In [ ]:
import pandas as pd
import numpy as np
import json
import datetime

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering
from sklearn.metrics import silhouette_score, mean_squared_error, mean_absolute_error, r2_score
from sklearn.metrics.pairwise import haversine_distances
from sklearn.neighbors import BallTree

import plotly.express as px
import plotly.graph_objects as go

import geopandas as gpd
from shapely.geometry import Point, Polygon, MultiPoint, box
from folium import GeoJson, Marker
from geopy.distance import geodesic
import pygeohash as pgh
import geohash2 as geohash

from math import radians, cos, sin, asin, sqrt
from scipy.spatial import cKDTree
from scipy.stats import chi2_contingency

from prophet import Prophet

from PIL import Image
from IPython.display import display

from tqdm import tqdm
import pyarrow
import openpyxl
In [2]:
df_station = pd.read_csv(r"C:\Users\asus\Desktop\5\2\DM\Homework\data\data\stations.csv")

df = pd.read_parquet(r"C:\Users\asus\Desktop\5\2\DM\Homework\data\data\daily-rent.parquet")


metro_df= pd.read_csv(r"C:\Users\asus\Desktop\5\2\DM\Homework\data\data\Metro_Bus_Stops.csv")
shuttle_df = pd.read_csv(r"C:\Users\asus\Desktop\5\2\DM\Homework\data\data\Shuttle_Bus_Stops.csv")

weather_df = pd.read_csv(r"C:\Users\asus\Desktop\5\2\DM\Homework\data2\Washington,DC,USA 2024-01-01 to 2024-12-31.csv") 

locations_df = pd.read_csv(r"C:\Users\asus\Desktop\5\2\DM\Homework\data\data\Capital_Bikeshare_Locations.csv")

cbd_gdf = gpd.read_file(r"C:\Users\asus\Desktop\5\2\DM\Homework\data2\Washington_DC_Boundary_Stone_Area.geojson")
parking_zones_gdf = gpd.read_file(r"C:\Users\asus\Desktop\5\2\DM\Homework\data\data\Residential_and_Visitor_Parking_Zones.geojson")
dc_boundary = gpd.read_file(r"C:\Users\asus\Desktop\5\2\DM\Homework\data2\Washington_DC_Boundary_Stone_Area.geojson")
Business_boundary =gpd.read_file(r"C:\Users\asus\Desktop\5\2\DM\Homework\data2\DDOT_Central_Business_District.geojson")

Prepresses : daily

In [3]:
len(df)
Out[3]:
6114323

الطلب الاول: تنظيف ومكاملة البيانات: a.سد الفجوات:

In [4]:
df.dtypes
Out[4]:
ride_id               string[python]
rideable_type         string[python]
started_at            datetime64[ns]
ended_at              datetime64[ns]
start_station_name    string[python]
start_station_id      string[python]
end_station_name      string[python]
end_station_id        string[python]
start_lat                    float64
start_lng                    float64
end_lat                      float64
end_lng                      float64
member_casual         string[python]
dtype: object
In [5]:
df['end_station_id'] = pd.to_numeric(df['end_station_id'], errors='coerce')


df['start_station_id'] = pd.to_numeric(df['start_station_id'], errors='coerce')
In [6]:
def clean_name(name):
    return str(name).strip().lower().replace('\xa0', '').replace('\n', '').replace('\r', '')
In [7]:
df['start_station_name'] = df['start_station_name'].map(clean_name)

df['end_station_name'] = df['end_station_name'].map(clean_name)

df_station['name'] = df_station['name'].map(clean_name)
In [8]:
df.isna().sum()
Out[8]:
ride_id                     0
rideable_type               0
started_at                  0
ended_at                    0
start_station_name          0
start_station_id      1190966
end_station_name            0
end_station_id        1237694
start_lat                   0
start_lng                   0
end_lat                  4650
end_lng                  4650
member_casual               0
dtype: int64
In [9]:
df['start_station_name'] = df['start_station_name'].replace('<na>', np.nan)
df['end_station_name'] = df['end_station_name'].replace('<na>', np.nan)
In [10]:
df.isna().sum()
Out[10]:
ride_id                     0
rideable_type               0
started_at                  0
ended_at                    0
start_station_name    1190966
start_station_id      1190966
end_station_name      1236167
end_station_id        1237694
start_lat                   0
start_lng                   0
end_lat                  4650
end_lng                  4650
member_casual               0
dtype: int64
In [11]:
invalid_mask = ~(
    df['start_lat'].between(38, 40) & 
    df['start_lng'].between(-78, -76)
)


print(f"عدد السطور التي لا تحقق الشرط الجغرافي: {invalid_mask.sum()}")


invalid_rows = df[invalid_mask]
print(invalid_rows[['start_station_name', 'start_lat', 'start_lng']].head(10))


df.loc[invalid_mask, ['start_lat', 'start_lng']] = np.nan
عدد السطور التي لا تحقق الشرط الجغرافي: 60
                                 start_station_name  start_lat  start_lng
74999                          glebe rd & 11th st n   0.000000   0.000000
142242               georgia & new hampshire ave nw   0.000000   0.000000
215570                         14th & belmont st nw   0.000000   0.000000
658823   pentagon city metro / 12th st & s hayes st   0.000000   0.000000
697252                       m st & delaware ave ne   0.000000   0.000000
728005                20th & o st nw / dupont south   0.000000   0.000000
781344                               17th & k st nw   0.000000   0.000000
814686                  new hampshire ave & t st nw   0.000000   0.000000
913346                columbia pike & s highland st   0.000000   0.000000
1003849             mount vernon ave & e nelson ave -81.346429 -78.366928
In [12]:
invalid_mask = ~(
    df['end_lat'].between(38, 40) & 
    df['end_lng'].between(-78, -76)
)


print(f"عدد السطور التي لا تحقق الشرط الجغرافي: {invalid_mask.sum()}")


invalid_rows = df[invalid_mask]
print(invalid_rows[['end_station_name', 'end_lat', 'end_lng']].head(10))


df.loc[invalid_mask, ['end_lat', 'end_lng']] = np.nan
عدد السطور التي لا تحقق الشرط الجغرافي: 4727
     end_station_name  end_lat  end_lng
16                NaN      NaN      NaN
36                NaN      NaN      NaN
275               NaN      NaN      NaN
297               NaN      NaN      NaN
309               NaN      NaN      NaN
1972              NaN      NaN      NaN
1995              NaN      NaN      NaN
1996              NaN      NaN      NaN
2018              NaN      NaN      NaN
2022              NaN      NaN      NaN
In [13]:
def calculate_station_centroids(df):

    start_df = df[['start_station_name', 'start_lat', 'start_lng']].rename(
        columns={'start_station_name': 'station_name', 'start_lat': 'lat', 'start_lng': 'lng'}
    )
    end_df = df[['end_station_name', 'end_lat', 'end_lng']].rename(
        columns={'end_station_name': 'station_name', 'end_lat': 'lat', 'end_lng': 'lng'}
    )
    

    combined = pd.concat([start_df, end_df], ignore_index=True)
    combined = combined.dropna(subset=['station_name', 'lat', 'lng']).drop_duplicates()

    def centroid(group):
        points = [Point(xy) for xy in zip(group['lng'], group['lat'])]
        if len(points) == 1:
            return points[0]
        else:
            return MultiPoint(points).centroid
    
    centroids = (
        combined.groupby('station_name', group_keys=False)
        .apply(centroid)
        .reset_index(name='geometry')
    )
    
    gdf_centroids = gpd.GeoDataFrame(centroids, geometry='geometry', crs='EPSG:4326')
    gdf_centroids['centroid_lat'] = gdf_centroids.geometry.y
    gdf_centroids['centroid_lng'] = gdf_centroids.geometry.x
    
    return gdf_centroids[['station_name', 'centroid_lat', 'centroid_lng']]


station_centroids = calculate_station_centroids(df)


df = df.merge(
    station_centroids.rename(columns={
        'station_name': 'start_station_name',
        'centroid_lat': 'start_lat',
        'centroid_lng': 'start_lng'
    }),
    on='start_station_name',
    how='left'
)

df = df.merge(
    station_centroids.rename(columns={
        'station_name': 'end_station_name',
        'centroid_lat': 'end_lat',
        'centroid_lng': 'end_lng'
    }),
    on='end_station_name',
    how='left'
)
In [14]:
unique_start_coords_count = df[['start_station_name','start_lat_y', 'start_lng_y']].drop_duplicates().dropna().shape[0]
print(f"عدد الأزواج الفريدة من خطوط الطول والعرض في بيانات البداية: {unique_start_coords_count}")

unique_end_coords_count = df[['end_station_name','end_lat_y', 'end_lng_y']].drop_duplicates().dropna().shape[0]
print(f"عدد الأزواج الفريدة من خطوط الطول والعرض في بيانات النهاية: {unique_end_coords_count}")
عدد الأزواج الفريدة من خطوط الطول والعرض في بيانات البداية: 817
عدد الأزواج الفريدة من خطوط الطول والعرض في بيانات النهاية: 817
In [15]:
print(df['started_at'].dt.year.value_counts())
print(df['ended_at'].dt.year.value_counts())
started_at
2024    6114303
1970         20
Name: count, dtype: int64
ended_at
2024    6114299
2030         20
2000          4
Name: count, dtype: int64
In [16]:
df = df[
    ~(
        (df['started_at'].dt.year == 1970) |
        (df['ended_at'].dt.year == 2030)|
       ( df['ended_at'].dt.year == 2000)

    )
]
In [17]:
print(df['started_at'].dt.year.value_counts())
print(df['ended_at'].dt.year.value_counts())
started_at
2024    6114279
Name: count, dtype: int64
ended_at
2024    6114279
Name: count, dtype: int64

Preprocess : station

In [18]:
df_station_sorted = df_station.sort_values(by=['name', 'id'])

df_station_unique = df_station_sorted.drop_duplicates(subset=['name'], keep='first').reset_index(drop=True)
In [19]:
df_station = df_station.sort_values(by=['name', 'id']).drop_duplicates(subset=['name'], keep='first').reset_index(drop=True)

وهون بنكتشف انو id غلط لانو انا عندي 819 محطة ف كيف id = 803?

In [20]:
num_unique_pairs = df_station[['id' ]].drop_duplicates().shape[0]
print(f"عدد القيم الفريدة   (id): {num_unique_pairs}")
عدد القيم الفريدة   (id): 803
In [21]:
num_unique_pairs = df_station[['id', 'name' ]].drop_duplicates().shape[0]
print(f"عدد القيم الفريدة لكل زوج (id, name): {num_unique_pairs}")
عدد القيم الفريدة لكل زوج (id, name): 819

المحطات يلي موجودة بالستيشن ومو موجودة برحلات اليومية وبنكتشف انو كل المحطات موجودة بس في منهن فقط كنهاية موجودين ومنهم فقط كبداية

In [22]:
end_names = set(df['end_station_name'].dropna().unique())
station_names = set(df_station['name'].dropna().unique())
missing_in_inside_dc =station_names - end_names
print(f"عددها: {len(missing_in_inside_dc)}")
for name in sorted(missing_in_inside_dc):
    print(" in df_station:", name)
عددها: 2
 in df_station: bowdoin ave & calvert rd/ college park metro
 in df_station: tysons west transit center
In [23]:
start_names = set(df['start_station_name'].dropna().unique())
station_names = set(df_station['name'].dropna().unique())
missing_in_inside_dc =station_names - start_names
print(f"عددها: {len(missing_in_inside_dc)}")
for name in sorted(missing_in_inside_dc):
    print("in df_station:", name)
عددها: 2
in df_station: eastern market metro/pillar
in df_station: tech trailer v-1
In [24]:
df_end = df[['end_station_id', 'end_station_name']].drop_duplicates()
merged = df_end.merge(df_station[['id', 'name']], left_on='end_station_name', right_on='name', how='inner')
mismatched_ids = merged[merged['end_station_id'] != merged['id']]
print(mismatched_ids[['end_station_name', 'end_station_id', 'id']])
                         end_station_name  end_station_id     id
871      randle circle & minnesota ave se           31799  31702
1107  john mccormack rd & michigan ave ne           31528  31502
In [25]:
df_end = df[['start_station_id', 'start_station_name']].drop_duplicates()
merged = df_end.merge(df_station[['id', 'name']], left_on='start_station_name', right_on='name', how='inner')
mismatched_ids = merged[merged['start_station_id'] != merged['id']]
print(mismatched_ids[['start_station_name', 'start_station_id', 'id']])
                      start_station_name  start_station_id     id
595     randle circle & minnesota ave se             31799  31702
801  john mccormack rd & michigan ave ne             31528  31502
In [26]:
df.loc[df['start_station_name'].str.lower() == 'john mccormack rd & michigan ave ne', 'start_station_id'] = 31502
df.loc[df['start_station_name'].str.lower() == 'randle circle & minnesota ave se', 'start_station_id'] = 31702
df.loc[df['end_station_name'].str.lower() == 'john mccormack rd & michigan ave ne', 'end_station_id'] = 31502
df.loc[df['end_station_name'].str.lower() == 'randle circle & minnesota ave se', 'end_station_id'] =31702
In [27]:
station_lookup = df[['start_station_name', 'start_lat_y', 'start_lng_y']].dropna().drop_duplicates()
station_lookup2 = df[['end_station_name', 'end_lat_y', 'end_lng_y']].dropna().drop_duplicates()
station_lookup = station_lookup.rename(columns={
    'start_station_name': 'name',
    'start_lat_y': 'lat',
    'start_lng_y': 'lng'
})

station_lookup2 = station_lookup2.rename(columns={
    'end_station_name': 'name',
    'end_lat_y': 'lat',
    'end_lng_y': 'lng'
})

combined_lookup = pd.concat([station_lookup, station_lookup2], ignore_index=True)
combined_lookup = combined_lookup.drop_duplicates(subset='name', keep='first')
df_station = df_station.merge(combined_lookup, how='left', left_on='name', right_on='name')
In [28]:
station_lookup = df_station[['name', 'lat', 'lng']].dropna().drop_duplicates()
gdf_station = gpd.GeoDataFrame(
    df_station,
    geometry=gpd.points_from_xy(df_station['lng'], df_station['lat']),
    crs='EPSG:4326'
)

if dc_boundary.crs != gdf_station.crs:
    dc_boundary = dc_boundary.to_crs(gdf_station.crs)

df_station['inside_dc'] = gdf_station.within(dc_boundary.unary_union)
C:\Users\asus\AppData\Local\Temp\ipykernel_12476\741496869.py:11: DeprecationWarning: The 'unary_union' attribute is deprecated, use the 'union_all()' method instead.
  df_station['inside_dc'] = gdf_station.within(dc_boundary.unary_union)
In [29]:
df_station['inside_dc'].value_counts()
Out[29]:
inside_dc
True     412
False    407
Name: count, dtype: int64
In [30]:
if Business_boundary.crs != gdf_station.crs:
    Business_boundary = Business_boundary.to_crs(gdf_station.crs)


df_station['inside_Business'] = gdf_station.within(Business_boundary.unary_union)
C:\Users\asus\AppData\Local\Temp\ipykernel_12476\2608659575.py:5: DeprecationWarning: The 'unary_union' attribute is deprecated, use the 'union_all()' method instead.
  df_station['inside_Business'] = gdf_station.within(Business_boundary.unary_union)
In [31]:
df_station['inside_Business'].value_counts()
Out[31]:
inside_Business
False    747
True      72
Name: count, dtype: int64
In [ ]:
dc_boundary = dc_boundary.to_crs(epsg=4326)
Business_boundary = Business_boundary.to_crs(epsg=4326)
dc_geojson = json.loads(dc_boundary.to_json())
business_geojson = json.loads(Business_boundary.to_json())

"""
fig = px.choropleth_mapbox(
    dc_boundary,
    geojson=dc_geojson,
    locations=dc_boundary.index,  # أي عمود عادي (لأننا بس بدنا نظهر الشكل)
    color_discrete_sequence=["#888888"],  # لون حدود المدينة
    center={"lat": 38.9, "lon": -77.03},  # مركز الخريطة
    zoom=10,
    mapbox_style="carto-positron",
    opacity=0.4
)

fig2 = px.choropleth_mapbox(
    Business_boundary,
    geojson=business_geojson,
    locations=Business_boundary.index,
    color_discrete_sequence=["#FF0000"],  # لون المنطقة التجارية (أحمر)
    center={"lat": 38.9, "lon": -77.03},
    zoom=10,
    mapbox_style="carto-positron",
    opacity=0.6
)

# px لا يدعم دمج مباشر → لكن يمكننا استخدام fig.add_trace
for trace in fig2.data:
    fig.add_trace(trace)

fig.update_layout(title="حدود واشنطن مع حدود المنطقة التجارية")
fig.show()
"""
image_path = r'C:\Users\ASUS\OneDrive\Desktop\ws.png'
image = Image.open(image_path)
display(image)
No description has been provided for this image

رسم مواقع المحطات بنسبة لواشنطن

In [ ]:
if dc_boundary.crs != "EPSG:4326":
    dc_boundary = dc_boundary.to_crs(epsg=4326)

dc_geojson = dc_boundary.__geo_interface__

geometry = [Point(xy) for xy in zip(station_lookup['lng'], station_lookup['lat'])]
gdf_stations = gpd.GeoDataFrame(station_lookup.copy(), geometry=geometry, crs="EPSG:4326")

center_lat = 38.9072
center_lon = -77.0369

"""
fig = px.choropleth_mapbox(
    dc_boundary,
    geojson=dc_geojson,
    locations=dc_boundary.index,
    color_discrete_sequence=["#cccccc"],  # لون رمادي أنعم
    center={"lat": center_lat, "lon": center_lon},
    mapbox_style="carto-positron",
    zoom=9,  # أقرب قليلاً
    opacity=0.3
)

fig.add_trace(go.Scattermapbox(
    lat=gdf_stations['lat'],
    lon=gdf_stations['lng'],
    mode='markers',
    marker=go.scattermapbox.Marker(
        size=3,
        color='red',
        opacity=0.8
    ),
    text=gdf_stations['name'],  # يظهر اسم المحطة عند المرور
    name='محطات الدراجات'
))

fig.update_layout(
    title="<b>حدود واشنطن + مواقع محطات الدراجات</b>",
    legend=dict(
        yanchor="top",
        y=0.98,
        xanchor="left",
        x=0.01
    ),
    margin={"r":0,"t":50,"l":0,"b":0},
    mapbox=dict(
        style="carto-positron",
        center=dict(lat=center_lat, lon=center_lon),
        zoom=11
    )
)
fig.show()
"""
image_path = r'C:\Users\ASUS\OneDrive\Desktop\sl.png'
image = Image.open(image_path)
display(image)
No description has been provided for this image

Prepresses : shuttle metro

In [ ]:
common_columns = set(shuttle_df.columns).intersection(set(metro_df.columns))
print(" الأعمدة المشتركة:", common_columns)

if 'GIS_ID' in common_columns:
    
    shuttle_id_unique = shuttle_df['GIS_ID'].is_unique
    metro_id_unique = metro_df['GIS_ID'].is_unique
    print(f" هل 'id' فريد في shuttle؟ {shuttle_id_unique}")
    print(f" هل 'id' فريد في metro؟ {metro_id_unique}")
    
    common_ids = set(shuttle_df['GIS_ID']).intersection(set(metro_df['GIS_ID']))
    print(f" عدد القيم المتشابهة في id بين الملفين: {len(common_ids)}")
    print(" بعض القيم المتشابهة:", list(common_ids)[:10])  
else:
    print(" لا يوجد عمود مشترك اسمه 'id'")
 الأعمدة المشتركة: {'GIS_ID', 'EDITED', 'Y', 'EDITOR', 'CREATED', 'SE_ANNO_CAD_DATA', 'GLOBALID', 'X', 'OBJECTID', 'CREATOR'}
 هل 'id' فريد في shuttle؟ True
 هل 'id' فريد في metro؟ True
 عدد القيم المتشابهة في id بين الملفين: 0
 بعض القيم المتشابهة: []
In [ ]:
shuttle_coords = set(zip(shuttle_df['LATITUDE'], shuttle_df['LONGITUDE']))
metro_coords = set(zip(metro_df['BSTP_LAT'], metro_df['BSTP_LON']))
common_coords = shuttle_coords.intersection(metro_coords)

print(f" عدد الإحداثيات المتطابقة بين Shuttle و Metro: {len(common_coords)}")
if common_coords:
    print(" أمثلة على الإحداثيات المشتركة:")
    for coord in list(common_coords)[:5]:
        print(coord)
else:
    print(" لا توجد إحداثيات متطابقة .")
 عدد الإحداثيات المتطابقة بين Shuttle و Metro: 0
 لا توجد إحداثيات متطابقة .
In [ ]:
metro_df['full_address'] = metro_df['AT_STR'].fillna('') + ' ' + metro_df['ON_STR'].fillna('')
metro_df['full_address'] = metro_df['full_address'].str.strip().str.upper()
shuttle_df['ADDRESS_clean'] = shuttle_df['ADDRESS'].str.strip().str.upper()
metro_df['full_address']
Out[ ]:
0                 44TH ST NW JENIFER ST NW
1             HARBOUR WAY MITCHELLVILLE RD
2        SLEEPY HOLLOW MANOR COLUMBIA PIKE
3                CUSHMAN PL WAYNEWOOD BLVD
4                 PRICES LN VERNON VIEW DR
                       ...                
10039             OLD MILL RD RICHMOND HWY
10040                 FRYE RD RICHMOND HWY
10041                   #12127 OLD FORT RD
10042          BUS BAY B FT WASHINGTON LOT
10043                    MERCK PL GABLE LN
Name: full_address, Length: 10044, dtype: object
In [ ]:
print("هل (BSTP_LAT, BSTP_LON) فريدة في Metro؟", metro_df.duplicated(subset=['BSTP_LAT', 'BSTP_LON']).any() == False)
print("هل (LATITUDE, LONGITUDE) فريدة في Shuttle؟", shuttle_df.duplicated(subset=['LATITUDE', 'LONGITUDE']).any() == False)
هل (BSTP_LAT, BSTP_LON) فريدة في Metro؟ True
هل (LATITUDE, LONGITUDE) فريدة في Shuttle؟ False
In [ ]:
shuttle_df.drop_duplicates(subset=['LATITUDE', 'LONGITUDE'], keep='first',inplace=True)
In [ ]:
print("هل (BSTP_LAT, BSTP_LON) فريدة في Metro؟", metro_df.duplicated(subset=['BSTP_LAT', 'BSTP_LON']).any() == False)
print("هل (LATITUDE, LONGITUDE) فريدة في Shuttle؟", shuttle_df.duplicated(subset=['LATITUDE', 'LONGITUDE']).any() == False)
هل (BSTP_LAT, BSTP_LON) فريدة في Metro؟ True
هل (LATITUDE, LONGITUDE) فريدة في Shuttle؟ True
In [ ]:
shuttle_df = shuttle_df[['MAR_ID', 'GIS_ID', 'LATITUDE', 'LONGITUDE', 'ADDRESS']].copy()
shuttle_df.rename(columns={'MAR_ID': 'ID'}, inplace=True)
metro_df = metro_df[['EGIS_ID', 'GIS_ID', 'BSTP_LAT', 'BSTP_LON', 'full_address']].copy()
metro_df.rename(columns={'EGIS_ID': 'ID', 'BSTP_LAT': 'LATITUDE', 'BSTP_LON': 'LONGITUDE', 'full_address': 'ADDRESS'}, inplace=True)
In [ ]:
invalid_mask = ~(
    shuttle_df['LATITUDE'].between(38, 40) & 
    shuttle_df['LONGITUDE'].between(-78, -76)
)


print(f"عدد السطور التي لا تحقق الشرط الجغرافي: {invalid_mask.sum()}")

invalid_mask = ~(
    metro_df['LATITUDE'].between(38, 40) & 
    metro_df['LONGITUDE'].between(-78, -76)
)


print(f"عدد السطور التي لا تحقق الشرط الجغرافي: {invalid_mask.sum()}")
عدد السطور التي لا تحقق الشرط الجغرافي: 0
عدد السطور التي لا تحقق الشرط الجغرافي: 0
In [40]:
metro_df.isna().sum()
Out[40]:
ID           0
GIS_ID       0
LATITUDE     0
LONGITUDE    0
ADDRESS      0
dtype: int64
In [41]:
unique_start_coords_count = metro_df[['GIS_ID','LATITUDE', 'LONGITUDE']].drop_duplicates().dropna().shape[0]
print(f"عدد الأزواج الفريدة من خطوط الطول والعرض في بيانات ميترو: {unique_start_coords_count}")

unique_end_coords_count = shuttle_df[['GIS_ID','LATITUDE', 'LONGITUDE']].drop_duplicates().dropna().shape[0]
print(f"عدد الأزواج الفريدة من خطوط الطول والعرض في بيانات شتل: {unique_end_coords_count}")
عدد الأزواج الفريدة من خطوط الطول والعرض في بيانات ميترو: 10044
عدد الأزواج الفريدة من خطوط الطول والعرض في بيانات شتل: 96
In [ ]:
columns_to_drop = ['X', 'Y', 'GIS_ID', 'GIS_LAST_MOD_DTTM', 'OBJECTID','IOS',
       'ANDROID','GIS_ID' ]
locations_df.drop(columns=columns_to_drop)
Out[ ]:
NAME STATION_TYPE STATION_ID STATION_STATUS LAST_REPORTED NUM_DOCKS_AVAILABLE NUM_DOCKS_DISABLED NUM_BIKES_AVAILABLE NUM_EBIKES_AVAILABLE NUM_BIKES_DISABLED ... IS_RENTING HAS_KIOSK ELECTRIC_BIKE_SURCHARGE_WAIVER EIGHTD_HAS_KEY_DISPENSER CAPACITY RENTAL_METHODS REGION_ID REGION_NAME LATITUDE LONGITUDE
0 Lincoln Memorial classic 08254284-1f3f-11e7-bf6b-3863bb334450 NaN 2025/05/05 20:25:51+00 17 0 8 0 0 ... YES YES NO NO 25 KEY,CREDITCARD 42.0 DCA-CABI 38.888255 -77.049437
1 W&OD Trail/Sunset Hills Rd & Isaac Newton Sq classic 08263fbd-1f3f-11e7-bf6b-3863bb334450 NaN 2025/05/05 20:25:51+00 15 0 3 0 1 ... YES YES NO NO 19 KEY,CREDITCARD 104.0 DCA-CABI 38.951419 -77.340281
2 17th St & Independence Ave SW classic 082623bf-1f3f-11e7-bf6b-3863bb334450 NaN 2025/05/05 20:25:51+00 10 0 6 4 1 ... YES YES NO NO 19 KEY,CREDITCARD 42.0 DCA-CABI 38.888097 -77.038325
3 8th & D St NW classic 08256ac9-1f3f-11e7-bf6b-3863bb334450 NaN 2025/05/05 20:25:49+00 0 0 24 0 0 ... YES NO NO NO 24 KEY,CREDITCARD 42.0 DCA-CABI 38.894851 -77.023240
4 Anacostia Ave & Benning Rd NE / River Terrace classic 082518eb-1f3f-11e7-bf6b-3863bb334450 NaN 2025/05/05 20:25:49+00 4 0 10 0 1 ... YES YES NO NO 15 KEY,CREDITCARD 42.0 DCA-CABI 38.896544 -76.960120
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
789 Oxon Hill Park & Ride classic ccd82461-11b7-4553-b70b-6cb7e5e4f8c2 NaN 2025/05/05 20:25:58+00 12 0 3 0 0 ... YES YES NO NO 15 KEY,CREDITCARD 133.0 DCA-CABI 38.798749 -77.001030
790 Fleet St & Ritchie Pkwy classic 0825b42a-1f3f-11e7-bf6b-3863bb334450 NaN 2025/05/05 20:25:58+00 10 0 4 1 1 ... YES YES NO NO 15 KEY,CREDITCARD 43.0 DCA-CABI 39.076331 -77.141378
791 Pennsylvania & Minnesota Ave SE classic 082498ac-1f3f-11e7-bf6b-3863bb334450 NaN 2025/05/05 20:26:00+00 4 0 7 0 0 ... YES YES NO NO 11 KEY,CREDITCARD 42.0 DCA-CABI 38.873057 -76.971015
792 Briggs Chaney Park & Ride classic ac598bfa-89c4-4229-a397-a6279b4cd536 NaN 2025/05/05 20:26:00+00 14 0 9 0 0 ... YES YES NO NO 23 KEY,CREDITCARD 44.0 DCA-CABI 39.077875 -76.941867
793 Old Meadow Rd & Cathy Ln classic 1890899920583743818 NaN 2025/05/05 20:26:01+00 12 0 0 0 0 ... YES YES NO NO 12 KEY,CREDITCARD NaN DCA-CABI 38.916703 -77.214448

794 rows × 22 columns

In [43]:
print(locations_df.columns)
print(f" Columns num: {len(locations_df.columns)}")
print(f"Rows num: {len(locations_df)}")
Index(['X', 'Y', 'NAME', 'STATION_TYPE', 'STATION_ID', 'STATION_STATUS',
       'LAST_REPORTED', 'NUM_DOCKS_AVAILABLE', 'NUM_DOCKS_DISABLED',
       'NUM_BIKES_AVAILABLE', 'NUM_EBIKES_AVAILABLE', 'NUM_BIKES_DISABLED',
       'IS_INSTALLED', 'IS_RETURNING', 'IS_RENTING', 'HAS_KIOSK', 'IOS',
       'ANDROID', 'ELECTRIC_BIKE_SURCHARGE_WAIVER', 'EIGHTD_HAS_KEY_DISPENSER',
       'CAPACITY', 'RENTAL_METHODS', 'REGION_ID', 'REGION_NAME', 'GIS_ID',
       'LATITUDE', 'LONGITUDE', 'GIS_LAST_MOD_DTTM', 'OBJECTID'],
      dtype='object')
 Columns num: 29
Rows num: 794
In [44]:
print(df_station.columns)
print(f" Columns num: {len(df_station.columns)}")
print(f"Rows num: {len(df_station)}")
Index(['id', 'name', 'lat', 'lng', 'inside_dc', 'inside_Business'], dtype='object')
 Columns num: 6
Rows num: 819
In [ ]:
columns_to_drop = ['OBJECTID', 'ID', 'GIS_ID', 'GLOBALID', 'CREATOR', 'CREATED',
                   'EDITOR', 'EDITED', 'SHAPEAREA', 'SHAPELEN']
cbd_gdf.drop(columns=columns_to_drop, inplace=True, errors='ignore')
In [46]:
print(cbd_gdf.columns)
print(f" Columns num: {len(cbd_gdf.columns)}")
print(f"Rows num: {cbd_gdf.shape}")
Index(['CITY_NAME', 'STATE_CITY', 'WEB_URL', 'AREAKM', 'AREAMILES',
       'geometry'],
      dtype='object')
 Columns num: 6
Rows num: (1, 6)
In [ ]:
columns_to_drop = ['description', 'icon', 'moonphase']
weather_df.drop(columns=columns_to_drop, inplace=True, errors='ignore')
In [ ]:
print(locations_df.isna().sum())
X                                   0
Y                                   0
NAME                                0
STATION_TYPE                        0
STATION_ID                          0
STATION_STATUS                    794
LAST_REPORTED                       0
NUM_DOCKS_AVAILABLE                 0
NUM_DOCKS_DISABLED                  0
NUM_BIKES_AVAILABLE                 0
NUM_EBIKES_AVAILABLE                0
NUM_BIKES_DISABLED                  0
IS_INSTALLED                        0
IS_RETURNING                        0
IS_RENTING                          0
HAS_KIOSK                           0
IOS                                 0
ANDROID                             0
ELECTRIC_BIKE_SURCHARGE_WAIVER      0
EIGHTD_HAS_KEY_DISPENSER            0
CAPACITY                            0
RENTAL_METHODS                      0
REGION_ID                          50
REGION_NAME                         0
GIS_ID                            794
LATITUDE                            0
LONGITUDE                           0
GIS_LAST_MOD_DTTM                   0
OBJECTID                            0
dtype: int64
In [ ]:
print(locations_df['STATION_TYPE'].unique())
['classic']
In [ ]:
locations_df['STATION_TYPE'] = locations_df.apply(
    lambda row: 'classic' if row['HAS_KIOSK'] == 'YES' and row['CAPACITY'] > 0 else 'lightweight',
    axis=1
)
In [ ]:
print(locations_df['STATION_TYPE'].unique())
['classic' 'lightweight']
In [52]:
status_counts = locations_df['STATION_TYPE'].value_counts()
print(status_counts)
STATION_TYPE
classic        761
lightweight     33
Name: count, dtype: int64
In [ ]:
print(locations_df['STATION_STATUS'].unique())
[nan]
In [ ]:
def determine_station_status(row):
    if row['IS_INSTALLED'] == 'YES' and row['IS_RENTING'] == 'YES' and row['IS_RETURNING'] == 'YES':
        return 'active'
    elif row['IS_INSTALLED'] == 'NO' or row['IS_RENTING'] == 'NO' or row['IS_RETURNING'] == 'NO':
        return 'out_of_service'
    elif pd.isna(row['IS_INSTALLED']) and pd.isna(row['IS_RENTING']) and pd.isna(row['IS_RETURNING']):
        return 'planned'
    else:
        return 'out_of_service'

locations_df['STATION_STATUS'] = locations_df.apply(determine_station_status, axis=1)
In [ ]:
print(locations_df['STATION_STATUS'].unique())
['active' 'out_of_service']
In [56]:
status_counts = locations_df['STATION_STATUS'].value_counts()
print(status_counts)
STATION_STATUS
active            790
out_of_service      4
Name: count, dtype: int64
In [ ]:
name_variants = locations_df.groupby(locations_df['NAME'].str.lower())['NAME'].nunique()
case_mismatches = name_variants[name_variants > 1]
print("Stations with same name but different casing:\n", case_mismatches)

duplicate_ids = locations_df.groupby('NAME')['STATION_ID'].nunique()
stations_with_multiple_ids = duplicate_ids[duplicate_ids > 1]
print("Stations with more than one ID:\n", stations_with_multiple_ids)
Stations with same name but different casing:
 Series([], Name: NAME, dtype: int64)
Stations with more than one ID:
 Series([], Name: STATION_ID, dtype: int64)
In [58]:
unique_id_count = locations_df['STATION_ID'].nunique()
print("Number of unique STATION_IDs:", unique_id_count)


unique_station_names = locations_df['NAME'].nunique()
print("Number of unique station names:", unique_station_names)

unique_station_names_case_insensitive = locations_df['NAME'].str.lower().nunique()
print("Number of unique station names (case-insensitive):", unique_station_names_case_insensitive)
Number of unique STATION_IDs: 794
Number of unique station names: 794
Number of unique station names (case-insensitive): 794
In [ ]:
print(locations_df['NUM_DOCKS_DISABLED'].value_counts().sort_index())
NUM_DOCKS_DISABLED
0    783
1     10
7      1
Name: count, dtype: int64
In [ ]:
locations_df['HAS_KIOSK'].value_counts()
Out[ ]:
HAS_KIOSK
YES    761
NO      33
Name: count, dtype: int64
In [ ]:
kiosk_comparison = locations_df.groupby('HAS_KIOSK')['NUM_BIKES_AVAILABLE'].sum().reset_index()
kiosk_comparison.columns = ['Has Kiosk', 'Total Available Bikes']
print(kiosk_comparison.to_string(index=False))
Has Kiosk  Total Available Bikes
       NO                    290
      YES                   4807
In [ ]:
kiosk_comparison_df = locations_df[['NAME', 'HAS_KIOSK', 'NUM_BIKES_AVAILABLE','CAPACITY']]
display(kiosk_comparison_df.sort_values(by='HAS_KIOSK'))
NAME HAS_KIOSK NUM_BIKES_AVAILABLE CAPACITY
261 21st & L St NW NO 14 16
702 Monroe Ave & Leslie Ave NO 16 16
481 Rolfe St & 9th St S NO 3 12
453 Eastern Market Metro NO 2 32
615 20th & M St NW NO 9 24
... ... ... ... ...
278 Rhode Island Ave Metro YES 2 19
279 Emma Lee St & Morris St YES 1 12
280 10th St & L'Enfant Plaza SW YES 13 23
307 4th & Florida Ave NE YES 11 19
793 Old Meadow Rd & Cathy Ln YES 0 12

794 rows × 4 columns

In [ ]:
capacity_mismatch = locations_df[
    locations_df['CAPACITY'] != (locations_df['NUM_DOCKS_AVAILABLE'] + locations_df['NUM_DOCKS_DISABLED'])
]

print("Number of mismatches:", len(capacity_mismatch))
Number of mismatches: 736
In [ ]:
capacity_mismatch[['NAME', 'CAPACITY', 'NUM_DOCKS_AVAILABLE', 'NUM_DOCKS_DISABLED']]
Out[ ]:
NAME CAPACITY NUM_DOCKS_AVAILABLE NUM_DOCKS_DISABLED
0 Lincoln Memorial 25 17 0
1 W&OD Trail/Sunset Hills Rd & Isaac Newton Sq 19 15 0
2 17th St & Independence Ave SW 19 10 0
3 8th & D St NW 24 0 0
4 Anacostia Ave & Benning Rd NE / River Terrace 15 4 0
... ... ... ... ...
788 Ward Circle / American University 15 13 0
789 Oxon Hill Park & Ride 15 12 0
790 Fleet St & Ritchie Pkwy 15 10 0
791 Pennsylvania & Minnesota Ave SE 11 4 0
792 Briggs Chaney Park & Ride 23 14 0

736 rows × 4 columns

In [ ]:
es = locations_df[locations_df['NUM_EBIKES_AVAILABLE'] > locations_df['NUM_BIKES_AVAILABLE']]
print("Stations where electric bikes > total bikes:", len(invalid_ebikes))
invalid_docks = locations_df[
    (locations_df['NUM_DOCKS_AVAILABLE'] + locations_df['NUM_DOCKS_DISABLED']) > locations_df['CAPACITY']
]
print("Stations where available + disabled docks > capacity:", len(invalid_docks))

not_installed = locations_df[locations_df['IS_INSTALLED'] == False]
violating_not_installed = not_installed[
    (not_installed[['NUM_BIKES_AVAILABLE', 'NUM_EBIKES_AVAILABLE', 'NUM_BIKES_DISABLED',
                    'NUM_DOCKS_AVAILABLE', 'NUM_DOCKS_DISABLED']] != 0).any(axis=1) |
    (not_installed[['IS_RENTING', 'IS_RETURNING']] != False).any(axis=1)
]
print("Stations marked not installed but still active:", len(violating_not_installed))

invalid_returning = locations_df[
    (locations_df['IS_RETURNING'] == False) & (locations_df['NUM_DOCKS_AVAILABLE'] > 0)
]
print("Stations not accepting returns but reporting available docks:", len(invalid_returning))

invalid_renting = locations_df[
    (locations_df['IS_RENTING'] == False) &
    ((locations_df['NUM_BIKES_AVAILABLE'] > 0) | (locations_df['NUM_EBIKES_AVAILABLE'] > 0))
]
print("Stations not renting but reporting available bikes:", len(invalid_renting))


invalid_kiosk = locations_df[
    (locations_df['HAS_KIOSK'] == False) & (locations_df['STATION_TYPE'] == 'classic')
]
print("Classic stations marked without a kiosk :", len(invalid_kiosk))
Stations where electric bikes > total bikes: 0
Stations where available + disabled docks > capacity: 0
Stations marked not installed but still active: 0
Stations not accepting returns but reporting available docks: 0
Stations not renting but reporting available bikes: 0
Classic stations marked without a kiosk : 0

Prepresses : طقس hana

In [ ]:
print(weather_df.columns)
print(f"Columns num : {len(weather_df.columns)}")
print(f"Rows num: {len(weather_df)}")
Index(['name', 'datetime', 'tempmax', 'tempmin', 'temp', 'humidity',
       'windspeed', 'windspeedmax', 'windspeedmean', 'windspeedmin',
       'cloudcover', 'sunrise', 'sunset', 'conditions', 'temp_range',
       'weather_bin', 'date'],
      dtype='object')
Columns num : 17
Rows num: 366
In [ ]:
weather_df.dtypes
Out[ ]:
name              object
datetime          object
tempmax          float64
tempmin          float64
temp             float64
humidity         float64
windspeed        float64
windspeedmax     float64
windspeedmean    float64
windspeedmin     float64
cloudcover       float64
sunrise           object
sunset            object
conditions        object
temp_range        object
weather_bin       object
date              object
dtype: object
In [ ]:
weather_df['datetime'] = pd.to_datetime(weather_df['datetime'], errors='coerce')
weather_df['sunrise'] = pd.to_datetime(weather_df['sunrise'], errors='coerce')
weather_df['sunset'] = pd.to_datetime(weather_df['sunset'], errors='coerce')
In [ ]:
print(weather_df.dtypes)
name                     object
datetime         datetime64[ns]
tempmax                 float64
tempmin                 float64
temp                    float64
humidity                float64
windspeed               float64
windspeedmax            float64
windspeedmean           float64
windspeedmin            float64
cloudcover              float64
sunrise          datetime64[ns]
sunset           datetime64[ns]
conditions               object
temp_range               object
weather_bin              object
date                     object
dtype: object
In [70]:
weather_df = weather_df.sort_values('datetime')

duplicates = weather_df['datetime'].duplicated().sum()
print(f"Number of duplicated dates: {duplicates}")

full_range = pd.date_range(start=weather_df['datetime'].min(), end=weather_df['datetime'].max(), freq='D')
missing_dates = full_range.difference(weather_df['datetime'])
print(f"Missing dates:\n{missing_dates}")
Number of duplicated dates: 0
Missing dates:
DatetimeIndex([], dtype='datetime64[ns]', freq='D')
In [ ]:
weather_df['datetime'] = pd.to_datetime(weather_df['datetime'], errors='coerce')

print(weather_df['datetime'].min(), "to", weather_df['datetime'].max())
print(weather_df['datetime'].dt.date.value_counts().sort_index())
2024-01-01 00:00:00 to 2024-12-31 00:00:00
datetime
2024-01-01    1
2024-01-02    1
2024-01-03    1
2024-01-04    1
2024-01-05    1
             ..
2024-12-27    1
2024-12-28    1
2024-12-29    1
2024-12-30    1
2024-12-31    1
Name: count, Length: 366, dtype: int64
In [ ]:
weather_counts = weather_df['conditions'].value_counts()

print("All weather conditions and their day counts:")
print(weather_counts)
All weather conditions and their day counts:
conditions
Partially cloudy                197
Rain, Partially cloudy           74
Rain, Overcast                   59
Clear                            24
Overcast                          6
Snow, Rain, Overcast              3
Snow, Rain, Partially cloudy      2
Snow, Partially cloudy            1
Name: count, dtype: int64
In [ ]:
rainy_days = weather_df[weather_df['conditions'].str.contains("Rain", case=False, na=False)]
sunny_days = weather_df[weather_df['conditions'].str.contains("Clear", case=False, na=False)]

print("All rainy days:")
print(rainy_days[['datetime', 'conditions']].to_string(index=False))

print("\nAll sunny days:")
print(sunny_days[['datetime', 'conditions']].to_string(index=False))
All rainy days:
  datetime                   conditions
2024-01-01               Rain, Overcast
2024-01-06               Rain, Overcast
2024-01-09               Rain, Overcast
2024-01-10       Rain, Partially cloudy
2024-01-12       Rain, Partially cloudy
2024-01-13       Rain, Partially cloudy
2024-01-15         Snow, Rain, Overcast
2024-01-16 Snow, Rain, Partially cloudy
2024-01-19         Snow, Rain, Overcast
2024-01-25               Rain, Overcast
2024-01-26       Rain, Partially cloudy
2024-01-27       Rain, Partially cloudy
2024-01-28               Rain, Overcast
2024-02-02               Rain, Overcast
2024-02-11               Rain, Overcast
2024-02-12               Rain, Overcast
2024-02-13       Rain, Partially cloudy
2024-02-17 Snow, Rain, Partially cloudy
2024-02-23       Rain, Partially cloudy
2024-02-24       Rain, Partially cloudy
2024-02-27       Rain, Partially cloudy
2024-02-28               Rain, Overcast
2024-03-01       Rain, Partially cloudy
2024-03-02               Rain, Overcast
2024-03-04               Rain, Overcast
2024-03-05       Rain, Partially cloudy
2024-03-06               Rain, Overcast
2024-03-07       Rain, Partially cloudy
2024-03-09               Rain, Overcast
2024-03-10       Rain, Partially cloudy
2024-03-15       Rain, Partially cloudy
2024-03-22       Rain, Partially cloudy
2024-03-23               Rain, Overcast
2024-03-27               Rain, Overcast
2024-03-28               Rain, Overcast
2024-03-31       Rain, Partially cloudy
2024-04-01               Rain, Overcast
2024-04-02               Rain, Overcast
2024-04-03               Rain, Overcast
2024-04-04       Rain, Partially cloudy
2024-04-11               Rain, Overcast
2024-04-12       Rain, Partially cloudy
2024-04-15       Rain, Partially cloudy
2024-04-19               Rain, Overcast
2024-04-20       Rain, Partially cloudy
2024-04-27               Rain, Overcast
2024-05-01       Rain, Partially cloudy
2024-05-04               Rain, Overcast
2024-05-05               Rain, Overcast
2024-05-06               Rain, Overcast
2024-05-07       Rain, Partially cloudy
2024-05-09       Rain, Partially cloudy
2024-05-10               Rain, Overcast
2024-05-11       Rain, Partially cloudy
2024-05-12       Rain, Partially cloudy
2024-05-14               Rain, Overcast
2024-05-15               Rain, Overcast
2024-05-18               Rain, Overcast
2024-05-23       Rain, Partially cloudy
2024-05-24       Rain, Partially cloudy
2024-05-25       Rain, Partially cloudy
2024-05-26       Rain, Partially cloudy
2024-05-27       Rain, Partially cloudy
2024-05-28       Rain, Partially cloudy
2024-05-29       Rain, Partially cloudy
2024-05-30       Rain, Partially cloudy
2024-06-03       Rain, Partially cloudy
2024-06-05               Rain, Overcast
2024-06-10       Rain, Partially cloudy
2024-06-11       Rain, Partially cloudy
2024-06-14       Rain, Partially cloudy
2024-06-26       Rain, Partially cloudy
2024-06-27       Rain, Partially cloudy
2024-06-30       Rain, Partially cloudy
2024-07-04       Rain, Partially cloudy
2024-07-10       Rain, Partially cloudy
2024-07-12               Rain, Overcast
2024-07-13       Rain, Partially cloudy
2024-07-17       Rain, Partially cloudy
2024-07-20       Rain, Partially cloudy
2024-07-21       Rain, Partially cloudy
2024-07-22               Rain, Overcast
2024-07-23       Rain, Partially cloudy
2024-07-24               Rain, Overcast
2024-07-25       Rain, Partially cloudy
2024-07-29       Rain, Partially cloudy
2024-07-30               Rain, Overcast
2024-07-31       Rain, Partially cloudy
2024-08-03       Rain, Partially cloudy
2024-08-07       Rain, Partially cloudy
2024-08-08               Rain, Overcast
2024-08-09               Rain, Overcast
2024-08-10       Rain, Partially cloudy
2024-08-17       Rain, Partially cloudy
2024-08-19       Rain, Partially cloudy
2024-08-26       Rain, Partially cloudy
2024-08-29       Rain, Partially cloudy
2024-08-30               Rain, Overcast
2024-08-31               Rain, Overcast
2024-09-01       Rain, Partially cloudy
2024-09-04       Rain, Partially cloudy
2024-09-07       Rain, Partially cloudy
2024-09-17               Rain, Overcast
2024-09-18               Rain, Overcast
2024-09-22               Rain, Overcast
2024-09-23               Rain, Overcast
2024-09-24               Rain, Overcast
2024-09-25               Rain, Overcast
2024-09-26               Rain, Overcast
2024-09-27               Rain, Overcast
2024-09-28       Rain, Partially cloudy
2024-09-29               Rain, Overcast
2024-09-30               Rain, Overcast
2024-10-01               Rain, Overcast
2024-10-02               Rain, Overcast
2024-10-03       Rain, Partially cloudy
2024-11-10       Rain, Partially cloudy
2024-11-11       Rain, Partially cloudy
2024-11-14               Rain, Overcast
2024-11-15       Rain, Partially cloudy
2024-11-19       Rain, Partially cloudy
2024-11-20               Rain, Overcast
2024-11-21       Rain, Partially cloudy
2024-11-22       Rain, Partially cloudy
2024-11-26       Rain, Partially cloudy
2024-11-28       Rain, Partially cloudy
2024-12-09       Rain, Partially cloudy
2024-12-10               Rain, Overcast
2024-12-11               Rain, Overcast
2024-12-15               Rain, Overcast
2024-12-16               Rain, Overcast
2024-12-18       Rain, Partially cloudy
2024-12-19       Rain, Partially cloudy
2024-12-20               Rain, Overcast
2024-12-24         Snow, Rain, Overcast
2024-12-28               Rain, Overcast
2024-12-29               Rain, Overcast
2024-12-31       Rain, Partially cloudy

All sunny days:
  datetime conditions
2024-01-17      Clear
2024-01-21      Clear
2024-02-03      Clear
2024-02-04      Clear
2024-02-06      Clear
2024-02-19      Clear
2024-02-29      Clear
2024-03-24      Clear
2024-03-25      Clear
2024-05-31      Clear
2024-06-22      Clear
2024-10-06      Clear
2024-10-08      Clear
2024-10-09      Clear
2024-10-11      Clear
2024-10-18      Clear
2024-10-19      Clear
2024-10-20      Clear
2024-10-21      Clear
2024-10-22      Clear
2024-10-23      Clear
2024-10-25      Clear
2024-12-02      Clear
2024-12-03      Clear
In [ ]:
max_temp = weather_df['tempmax'].max()
max_temp_days = weather_df[weather_df['tempmax'] == max_temp]

print(f" Highest temperature: {max_temp}°C")
print(f"Occurred on {len(max_temp_days)} day(s) with these conditions:")
print(max_temp_days[['datetime', 'tempmax', 'conditions']].to_string(index=False))

min_temp = weather_df['tempmin'].min()
min_temp_days = weather_df[weather_df['tempmin'] == min_temp]

print(f"\n Lowest temperature: {min_temp}°C")
print(f"Occurred on {len(min_temp_days)} day(s) with these conditions:")
print(min_temp_days[['datetime', 'tempmin', 'conditions']].to_string(index=False))
 Highest temperature: 39.3°C
Occurred on 1 day(s) with these conditions:
  datetime  tempmax       conditions
2024-07-16     39.3 Partially cloudy

 Lowest temperature: -9.0°C
Occurred on 1 day(s) with these conditions:
  datetime  tempmin conditions
2024-01-17     -9.0      Clear
In [ ]:
max_humidity = weather_df['humidity'].max()
humid_days = weather_df[weather_df['humidity'] == max_humidity]
print(f"Highest humidity: {max_humidity}%")
print("Occurred on:")
print(humid_days[['datetime', 'humidity', 'conditions']])
Highest humidity: 91.7%
Occurred on:
      datetime  humidity      conditions
344 2024-12-10      91.7  Rain, Overcast
In [ ]:
max_wind = weather_df['windspeed'].max()
windiest_days = weather_df[weather_df['windspeed'] == max_wind]
print(f"Highest wind speed: {max_wind} km/h")
print("Occurred on:")
print(windiest_days[['datetime', 'windspeed', 'conditions']])
Highest wind speed: 50.8 km/h
Occurred on:
      datetime  windspeed        conditions
339 2024-12-05       50.8  Partially cloudy
In [ ]:
mixed_days = weather_df[
    weather_df['conditions'].str.contains("Rain", case=False, na=False) & 
    weather_df['conditions'].str.contains("Snow", case=False, na=False)
]

print(" Days with both Rain and Snow:")
print(mixed_days[['datetime', 'conditions', 'tempmin', 'tempmax', 'temp']])
 Days with both Rain and Snow:
      datetime                    conditions  tempmin  tempmax  temp
14  2024-01-15          Snow, Rain, Overcast     -3.4     -0.7  -1.9
15  2024-01-16  Snow, Rain, Partially cloudy     -6.7     -0.7  -2.1
18  2024-01-19          Snow, Rain, Overcast     -1.3      0.7  -0.3
47  2024-02-17  Snow, Rain, Partially cloudy     -0.1      4.9   2.4
358 2024-12-24          Snow, Rain, Overcast     -1.3      4.3   1.8
In [ ]:
bins = [-10, 0, 5, 10, 15, 20, 25, 30, 35, 40, 45]
labels = ['<0°C', '0–5°C', '5–10°C', '10–15°C', '15–20°C', '20–25°C', '25–30°C', '30–35°C', '35–40°C', '40+°C']
weather_df['temp_range'] = pd.cut(weather_df['temp'], bins=bins, labels=labels)

temp_counts = weather_df['temp_range'].value_counts().sort_index()

print("\n Most Frequent Average Temperature Ranges:")
print(temp_counts)
 Most Frequent Average Temperature Ranges:
temp_range
<0°C       10
0–5°C      36
5–10°C     56
10–15°C    70
15–20°C    48
20–25°C    69
25–30°C    64
30–35°C    13
35–40°C     0
40+°C       0
Name: count, dtype: int64
In [ ]:
combo_counts = weather_df.groupby(['temp_range', 'conditions']).size().sort_values(ascending=False)

print("\n Most Common Condition + Temp Range Combinations:")
print(combo_counts)
 Most Common Condition + Temp Range Combinations:
temp_range  conditions                  
20–25°C     Partially cloudy                47
10–15°C     Partially cloudy                39
25–30°C     Partially cloudy                32
            Rain, Partially cloudy          26
0–5°C       Partially cloudy                24
                                            ..
20–25°C     Snow, Rain, Overcast             0
            Snow, Rain, Partially cloudy     0
25–30°C     Clear                            0
            Overcast                         0
40+°C       Snow, Rain, Partially cloudy     0
Length: 80, dtype: int64
In [ ]:
weather_df['datetime'] = pd.to_datetime(weather_df['datetime'])
"""
fig = px.line(
    weather_df,
    x='datetime',
    y=['temp', 'humidity'],
    labels={'value': 'Measurement', 'variable': 'Metric'},
    title='Daily Temperature and Humidity in 2024'
)
fig.show()
"""

image_path = r'C:\Users\ASUS\OneDrive\Desktop\106.png'
image = Image.open(image_path)
display(image)
No description has been provided for this image
In [ ]:
weather_df['datetime'] = pd.to_datetime(weather_df['datetime'])
time_series_metrics = [
    'temp',          
    'humidity',       
    'tempmax',        
    'tempmin',       
    'windspeed',     
    'cloudcover'      
]
"""

fig = px.line(
    weather_df,
    x='datetime',
    y=time_series_metrics,
    labels={'value': 'Measurement', 'variable': 'Metric'},
    title='Daily Weather Metrics in 2024'
)
fig.show()
"""

image_path = r'C:\Users\ASUS\OneDrive\Desktop\107.png'
image = Image.open(image_path)
display(image)
No description has been provided for this image
In [80]:
parking_zones_gdf = parking_zones_gdf.drop(columns=['SHAPELEN', 'SHAPEAREA', 'WEB_URL', 'GIS_ID','OBJECTID','SHAPE_LENG'])
parking_zones_gdf = parking_zones_gdf.drop(columns=['CREATOR', 'CREATED', 'EDITOR', 'EDITED'])
In [ ]:
print(parking_zones_gdf.columns)
print(parking_zones_gdf.head())
print(f"Columns num : {len(parking_zones_gdf.columns)}")
print(f"Rows num: {len(parking_zones_gdf)}")
Index(['NAME', 'RPP_ZONE', 'ANC_ID', 'geometry'], dtype='object')
     NAME  RPP_ZONE ANC_ID                                           geometry
0  ANC 4A         4     4A  POLYGON ((-77.03331 38.98999, -77.03332 38.99,...
1  ANC 3G         3     3G  POLYGON ((-77.05161 38.98615, -77.05181 38.986...
2  ANC 4B         4     4B  POLYGON ((-76.99908 38.96328, -76.99909 38.963...
3  ANC 3F         3     3F  POLYGON ((-77.04702 38.96038, -77.04722 38.960...
4  ANC 4C         4     4C  POLYGON ((-77.02803 38.95622, -77.02804 38.956...
Columns num : 4
Rows num: 40
In [82]:
columns_to_check = [
    'NAME', 'RPP_ZONE', 'ANC_ID', 'geometry'
]

for col in columns_to_check:
    print(f"\nUnique values in '{col}':")
    print(parking_zones_gdf[col].unique())
Unique values in 'NAME':
['ANC 4A' 'ANC 3G' 'ANC 4B' 'ANC 3F' 'ANC 4C' 'ANC 4D' 'ANC 5A' 'ANC 5B'
 'ANC 3D' 'ANC 3C' 'ANC 1D' 'ANC 1A' 'ANC 5E' 'ANC 3B' 'ANC 1B' 'ANC 1C'
 'ANC 2D' 'ANC 2E' 'ANC 5D' 'ANC 2B' 'ANC 6E' 'ANC 2F' 'ANC 7C' 'ANC 6C'
 'ANC 2A' 'ANC 6A' 'ANC 2C' 'ANC 7F' 'ANC 6B' 'ANC 7E' 'ANC 6D' 'ANC 7B'
 'ANC 8A' 'ANC 8C' 'ANC 8B' 'ANC 8E' 'ANC 8D' 'ANC 3E' 'ANC 5C' 'ANC 7D']

Unique values in 'RPP_ZONE':
[4 3 5 1 2 6 7 8]

Unique values in 'ANC_ID':
['4A' '3G' '4B' '3F' '4C' '4D' '5A' '5B' '3D' '3C' '1D' '1A' '5E' '3B'
 '1B' '1C' '2D' '2E' '5D' '2B' '6E' '2F' '7C' '6C' '2A' '6A' '2C' '7F'
 '6B' '7E' '6D' '7B' '8A' '8C' '8B' '8E' '8D' '3E' '5C' '7D']

Unique values in 'geometry':
<GeometryArray>
[<POLYGON ((-77.033 38.99, -77.033 38.99, -77.041 38.996, -77.052 38.987, -77...>,
 <POLYGON ((-77.052 38.986, -77.052 38.986, -77.052 38.986, -77.052 38.986, -...>,
 <POLYGON ((-76.999 38.963, -76.999 38.963, -77.002 38.966, -77.002 38.966, -...>,
 <POLYGON ((-77.047 38.96, -77.047 38.96, -77.047 38.96, -77.048 38.96, -77.0...>,
 <POLYGON ((-77.028 38.956, -77.028 38.956, -77.028 38.957, -77.028 38.957, -...>,
 <POLYGON ((-77.019 38.958, -77.019 38.958, -77.019 38.958, -77.019 38.958, -...>,
 <POLYGON ((-76.991 38.957, -76.991 38.957, -76.992 38.957, -76.992 38.957, -...>,
 <POLYGON ((-76.982 38.95, -76.982 38.95, -76.983 38.95, -76.983 38.95, -76.9...>,
 <POLYGON ((-77.101 38.949, -77.101 38.949, -77.101 38.949, -77.101 38.949, -...>,
 <POLYGON ((-77.078 38.943, -77.078 38.943, -77.078 38.943, -77.078 38.943, -...>,
 <POLYGON ((-77.036 38.936, -77.036 38.937, -77.036 38.937, -77.036 38.937, -...>,
 <POLYGON ((-77.035 38.937, -77.036 38.937, -77.036 38.937, -77.036 38.937, -...>,
 <POLYGON ((-76.994 38.927, -76.994 38.928, -76.994 38.928, -76.994 38.929, -...>,
 <POLYGON ((-77.077 38.928, -77.077 38.928, -77.077 38.928, -77.077 38.928, -...>,
 <POLYGON ((-77.019 38.929, -77.019 38.929, -77.019 38.929, -77.019 38.929, -...>,
 <POLYGON ((-77.046 38.926, -77.047 38.926, -77.047 38.926, -77.047 38.926, -...>,
 <POLYGON ((-77.049 38.92, -77.05 38.92, -77.05 38.921, -77.05 38.921, -77.05...>,
 <POLYGON ((-77.067 38.919, -77.067 38.918, -77.067 38.918, -77.067 38.918, -...>,
 <POLYGON ((-76.98 38.908, -76.98 38.908, -76.98 38.908, -76.98 38.908, -76.9...>,
 <POLYGON ((-77.041 38.917, -77.041 38.917, -77.041 38.917, -77.041 38.917, -...>,
 <POLYGON ((-77.018 38.914, -77.018 38.914, -77.018 38.915, -77.018 38.915, -...>,
 <POLYGON ((-77.032 38.914, -77.032 38.914, -77.032 38.914, -77.033 38.914, -...>,
 <POLYGON ((-76.931 38.91, -76.931 38.91, -76.931 38.91, -76.931 38.91, -76.9...>,
 <POLYGON ((-76.995 38.901, -76.995 38.901, -76.995 38.902, -76.995 38.902, -...>,
 <POLYGON ((-77.054 38.907, -77.054 38.907, -77.055 38.907, -77.055 38.907, -...>,
 <POLYGON ((-76.987 38.901, -76.987 38.901, -76.987 38.902, -76.987 38.902, -...>,
 <POLYGON ((-77.024 38.903, -77.024 38.903, -77.024 38.903, -77.025 38.903, -...>,
 <POLYGON ((-76.947 38.898, -76.949 38.898, -76.949 38.898, -76.949 38.898, -...>,
 <POLYGON ((-77.005 38.89, -77.006 38.89, -77.006 38.89, -77.009 38.89, -77.0...>,
 <POLYGON ((-76.936 38.89, -76.936 38.89, -76.936 38.89, -76.936 38.89, -76.9...>,
 <POLYGON ((-77.01 38.888, -77.011 38.888, -77.011 38.888, -77.011 38.888, -7...>,
 <POLYGON ((-76.965 38.887, -76.965 38.887, -76.965 38.886, -76.965 38.886, -...>,
 <POLYGON ((-76.972 38.873, -76.972 38.873, -76.972 38.873, -76.973 38.873, -...>,
 <POLYGON ((-76.993 38.858, -76.993 38.859, -76.993 38.859, -76.993 38.859, -...>,
 <POLYGON ((-76.973 38.863, -76.973 38.863, -76.974 38.863, -76.974 38.864, -...>,
 <POLYGON ((-76.975 38.853, -76.975 38.853, -76.975 38.853, -76.975 38.853, -...>,
 <POLYGON ((-77.022 38.841, -77.022 38.841, -77.022 38.841, -77.033 38.844, -...>,
 <POLYGON ((-77.072 38.951, -77.072 38.951, -77.072 38.951, -77.072 38.951, -...>,
 <POLYGON ((-76.971 38.938, -76.971 38.938, -76.971 38.938, -76.971 38.938, -...>,
 <POLYGON ((-76.937 38.906, -76.937 38.906, -76.937 38.906, -76.936 38.906, -...>]
Length: 40, dtype: geometry
In [ ]:
unique_names = parking_zones_gdf['NAME'].nunique()
print(f"Number of parking zones in 'NAME': {unique_names}")

unique_names = parking_zones_gdf['RPP_ZONE'].nunique()
print(f"Number of RPP ZONE: {unique_names}")

unique_names = parking_zones_gdf['ANC_ID'].nunique()
print(f"Number of ANC_ID: {unique_names}")

unique_names = parking_zones_gdf['geometry'].nunique()
print(f"Number of geometry: {unique_names}")
Number of parking zones in 'NAME': 40
Number of RPP ZONE: 8
Number of ANC_ID: 40
Number of geometry: 40
In [ ]:
center = parking_zones_gdf.geometry.centroid.unary_union.centroid
map_center = [center.y, center.x]
"""
m = folium.Map(location=map_center, zoom_start=12)
GeoJson(parking_zones_gdf).add_to(m)
GeoJson(
    parking_zones_gdf,
    tooltip=folium.GeoJsonTooltip(fields=["NAME", "RPP_ZONE"])
).add_to(m)

m
"""

image_path = r'C:\Users\ASUS\OneDrive\Desktop\113.png'
image = Image.open(image_path)
display(image)
C:\Users\ASUS\AppData\Local\Temp\ipykernel_2628\2410221009.py:2: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.

  center = parking_zones_gdf.geometry.centroid.unary_union.centroid
C:\Users\ASUS\AppData\Local\Temp\ipykernel_2628\2410221009.py:2: DeprecationWarning: The 'unary_union' attribute is deprecated, use the 'union_all()' method instead.
  center = parking_zones_gdf.geometry.centroid.unary_union.centroid
No description has been provided for this image
In [ ]:
locations_df['geometry'] = locations_df.apply(lambda row: Point(row['LONGITUDE'], row['LATITUDE']), axis=1)
locations_gdf = gpd.GeoDataFrame(locations_df, geometry='geometry', crs='EPSG:4326')
In [ ]:
arking_zones_gdf = parking_zones_gdf.to_crs(epsg=4326)
locations_gdf = locations_gdf.to_crs(epsg=4326)
In [ ]:
stations_with_parking_zone = gpd.sjoin(
    locations_gdf,
    parking_zones_gdf[['NAME', 'geometry']].copy(),
    how='left',
    predicate='within'
)
In [ ]:
stations_with_parking_zone.rename(columns={'NAME_right': 'ZONE_NAME'}, inplace=True)
stations_with_parking_zone['ZONE_NAME'].value_counts()
Out[ ]:
ZONE_NAME
ANC 2A    34
ANC 6D    31
ANC 2C    28
ANC 2B    24
ANC 6C    21
ANC 1B    18
ANC 5E    17
ANC 6B    16
ANC 2F    13
ANC 6A    12
ANC 5D    11
ANC 8A    11
ANC 1C    11
ANC 2E    11
ANC 1A    10
ANC 6E     9
ANC 3D     9
ANC 3C     9
ANC 4C     8
ANC 5B     8
ANC 5C     8
ANC 7D     8
ANC 7B     7
ANC 4B     6
ANC 5A     6
ANC 3E     6
ANC 4A     5
ANC 8D     5
ANC 7C     5
ANC 4D     5
ANC 3F     4
ANC 8B     4
ANC 8E     4
ANC 7E     4
ANC 7F     4
ANC 3B     3
ANC 8C     3
ANC 3G     3
ANC 1D     2
Name: count, dtype: int64
In [ ]:
stations_with_parking_zone['ZONE_NAME'].isna().sum()
Out[ ]:
391
In [ ]:
outside_parking_zones = stations_with_parking_zone[stations_with_parking_zone['ZONE_NAME'].isna()]
outside_parking_zones[['NAME_left', 'LATITUDE', 'LONGITUDE', 'STATION_STATUS']]
Out[ ]:
NAME_left LATITUDE LONGITUDE STATION_STATUS
1 W&OD Trail/Sunset Hills Rd & Isaac Newton Sq 38.951419 -77.340281 active
5 Roosevelt Center & Crescent Rd 39.000310 -76.878005 active
6 Braddock Rd Metro North 38.814577 -77.052808 active
8 S Randolph St & Campbell Ave 38.840654 -77.088659 active
9 Crescent Rd & Ridge Rd 39.006191 -76.891247 active
... ... ... ... ...
787 King St & Peyton St 38.806002 -77.055911 active
789 Oxon Hill Park & Ride 38.798749 -77.001030 active
790 Fleet St & Ritchie Pkwy 39.076331 -77.141378 active
792 Briggs Chaney Park & Ride 39.077875 -76.941867 active
793 Old Meadow Rd & Cathy Ln 38.916703 -77.214448 active

391 rows × 4 columns

In [ ]:
outside_parking_zones['STATION_STATUS'].value_counts()
Out[ ]:
STATION_STATUS
active            390
out_of_service      1
Name: count, dtype: int64
In [ ]:
(outside_parking_zones['NUM_BIKES_AVAILABLE'] == 0).sum()
Out[ ]:
38
In [ ]:
"""
dc_boundary = dc_boundary.to_crs(epsg=4326)
map_center = outside_parking_zones.geometry.unary_union.centroi



m = folium.Map(location=[map_center.y, map_center.x], zoom_start=12)
tooltip_fields = [col for col in dc_boundary.columns if col != 'geometry']
folium.GeoJson(
    dc_boundary,
    name="DC Boundary",
    style_function=lambda feature: {
        'fillColor': 'none',
        'color': 'blue',
        'weight': 2,
        'dashArray': '5, 5'
    },
    tooltip=folium.GeoJsonTooltip(fields=tooltip_fields)
).add_to(m)
for _, row in outside_parking_zones.iterrows():
    color = 'red' if row['STATION_STATUS'] == 'out_of_service' else 'green'
    folium.CircleMarker(
        location=[row['LATITUDE'], row['LONGITUDE']],
        radius=5,
        color=color,
        fill=True,
        fill_opacity=0.7,
        popup=folium.Popup(f"
            <b>{row['NAME_left']}</b><br>
            Status: {row['STATION_STATUS']}<br>
            No Zone
        ", max_width=250)
    ).add_to(m)

m
"""
image_path = r'C:\Users\ASUS\OneDrive\Desktop\123.png'
image = Image.open(image_path)
display(image)
No description has been provided for this image
In [ ]:
stations_with_parking_zone.groupby(['ZONE_NAME', 'STATION_TYPE']).size().unstack(fill_value=0)
Out[ ]:
STATION_TYPE classic lightweight
ZONE_NAME
ANC 1A 10 0
ANC 1B 18 0
ANC 1C 9 2
ANC 1D 2 0
ANC 2A 28 6
ANC 2B 24 0
ANC 2C 25 3
ANC 2E 10 1
ANC 2F 13 0
ANC 3B 3 0
ANC 3C 9 0
ANC 3D 9 0
ANC 3E 6 0
ANC 3F 4 0
ANC 3G 3 0
ANC 4A 5 0
ANC 4B 6 0
ANC 4C 7 1
ANC 4D 5 0
ANC 5A 6 0
ANC 5B 8 0
ANC 5C 8 0
ANC 5D 11 0
ANC 5E 17 0
ANC 6A 11 1
ANC 6B 14 2
ANC 6C 21 0
ANC 6D 28 3
ANC 6E 9 0
ANC 7B 7 0
ANC 7C 5 0
ANC 7D 8 0
ANC 7E 4 0
ANC 7F 4 0
ANC 8A 10 1
ANC 8B 4 0
ANC 8C 3 0
ANC 8D 5 0
ANC 8E 4 0
In [ ]:
outside_parking_zones['STATION_TYPE'].value_counts()
Out[ ]:
STATION_TYPE
classic        378
lightweight     13
Name: count, dtype: int64
In [ ]:
stations_with_parking_zone.groupby(['ZONE_NAME', 'STATION_STATUS']).size().unstack(fill_value=0)
Out[ ]:
STATION_STATUS active out_of_service
ZONE_NAME
ANC 1A 10 0
ANC 1B 18 0
ANC 1C 11 0
ANC 1D 2 0
ANC 2A 34 0
ANC 2B 24 0
ANC 2C 28 0
ANC 2E 11 0
ANC 2F 13 0
ANC 3B 3 0
ANC 3C 9 0
ANC 3D 9 0
ANC 3E 6 0
ANC 3F 4 0
ANC 3G 3 0
ANC 4A 5 0
ANC 4B 6 0
ANC 4C 8 0
ANC 4D 5 0
ANC 5A 6 0
ANC 5B 7 1
ANC 5C 8 0
ANC 5D 11 0
ANC 5E 17 0
ANC 6A 12 0
ANC 6B 15 1
ANC 6C 20 1
ANC 6D 31 0
ANC 6E 9 0
ANC 7B 7 0
ANC 7C 5 0
ANC 7D 8 0
ANC 7E 4 0
ANC 7F 4 0
ANC 8A 11 0
ANC 8B 4 0
ANC 8C 3 0
ANC 8D 5 0
ANC 8E 4 0
In [ ]:
all_parking_zone_names = set(parking_zones_gdf['NAME'].unique())
zones_with_bikes = set(stations_with_parking_zone['ZONE_NAME'].dropna().unique())
zones_without_bikes = all_parking_zone_names - zones_with_bikes
print("zones_without_bikes:")
print(zones_without_bikes)
zones_without_bikes:
{'ANC 2D'}
In [ ]:
# dc_boundary = dc_boundary.to_crs(epsg=4326)
# center = dc_boundary.geometry.unary_union.centroid
# m = folium.Map(location=[center.y, center.x], zoom_start=12)
"""
tooltip_fields = [col for col in dc_boundary.columns if col != 'geometry']
folium.GeoJson(
    dc_boundary,
    name="DC Boundary",
    style_function=lambda feature: {
        'fillColor': 'none',
        'color': 'blue',
        'weight': 2,
        'dashArray': '5, 5'
    },
    tooltip=folium.GeoJsonTooltip(fields=tooltip_fields)
).add_to(m)
stations_in_zones = stations_with_parking_zone[stations_with_parking_zone['ZONE_NAME'].notna()]
for _, row in stations_in_zones.iterrows():
    color = 'red' if row['STATION_STATUS'] == 'out_of_service' else 'green'
    folium.CircleMarker(
        location=[row['LATITUDE'], row['LONGITUDE']],
        radius=5,
        color=color,
        fill=True,
        fill_opacity=0.7,
        popup=folium.Popup(f""
            <b>{row['NAME_left']}</b><br>
            Status: {row['STATION_STATUS']}<br>
            Zone: {row['ZONE_NAME']}
        "", max_width=250)
    ).add_to(m)

m
"""

image_path = r"C:\Users\asus\Desktop\photo_2025-06-22_01-05-41.jpg"
image = Image.open(image_path)
display(image)
No description has been provided for this image
In [ ]:
map_center = cbd_gdf.geometry.unary_union.centroid
m = folium.Map(location=[map_center.y, map_center.x], zoom_start=13)
"""
folium.GeoJson(
    cbd_gdf,
    name="CBD",
    style_function=lambda feature: {
        'fillColor': 'red',
        'color': 'darkred',
        'weight': 2,
        'fillOpacity': 0.2
    },
    tooltip="Central Business District (ممنوع ركوب الدراجة على الأرصفة)"
).add_to(m)


folium.GeoJson(
    parking_zones_gdf,
    name="Parking Zones",
    style_function=lambda feature: {
        'fillColor': 'blue',
        'color': 'blue',
        'weight': 1,
        'fillOpacity': 0.1
    },
    tooltip=folium.GeoJsonTooltip(fields=['NAME'])
).add_to(m)

folium.LayerControl().add_to(m)
m
"""

image_path = r'C:\Users\ASUS\OneDrive\Desktop\129.png'
image = Image.open(image_path)
display(image)
C:\Users\ASUS\AppData\Local\Temp\ipykernel_14696\3232377439.py:1: DeprecationWarning: The 'unary_union' attribute is deprecated, use the 'union_all()' method instead.
  map_center = cbd_gdf.geometry.unary_union.centroid
No description has been provided for this image
In [ ]:
map_center = cbd_gdf.geometry.unary_union.centroid
"""
m = folium.Map(location=[map_center.y, map_center.x], zoom_start=13)
folium.GeoJson(
    cbd_gdf,
    name="CBD",
    style_function=lambda feature: {
        'fillColor': 'red',
        'color': 'darkred',
        'weight': 2,
        'fillOpacity': 0.2
    },
    tooltip=" Central Business District - لا يسمح بركوب الدراجة على الأرصفة"
).add_to(m)

folium.GeoJson(
    parking_zones_gdf,
    name="Parking Zones",
    style_function=lambda feature: {
        'fillColor': 'blue',
        'color': 'blue',
        'weight': 1,
        'fillOpacity': 0.1
    },
    tooltip=folium.GeoJsonTooltip(fields=['NAME'])
).add_to(m)

for _, row in stations_with_parking_zone.iterrows():
    color = 'red' if row['STATION_STATUS'] == 'out_of_service' else 'green'
    popup_text = f""
        <b>{row['NAME_left']}</b><br>
        Status: {row['STATION_STATUS']}<br>
        Zone: {row['ZONE_NAME'] if pd.notna(row['ZONE_NAME']) else 'No Zone'}
    ""
    folium.CircleMarker(
        location=[row['LATITUDE'], row['LONGITUDE']],
        radius=5,
        color=color,
        fill=True,
        fill_opacity=0.7,
        popup=folium.Popup(popup_text, max_width=250)
    ).add_to(m)

folium.LayerControl().add_to(m)

m
"""
image_path = r'C:\Users\ASUS\OneDrive\Desktop\130.png'
image = Image.open(image_path)
display(image)
C:\Users\ASUS\AppData\Local\Temp\ipykernel_2628\3368861370.py:1: DeprecationWarning: The 'unary_union' attribute is deprecated, use the 'union_all()' method instead.
  map_center = cbd_gdf.geometry.unary_union.centroid
No description has been provided for this image
In [ ]:
parking_zones_in_cbd = parking_zones_gdf[parking_zones_gdf.intersects(cbd_gdf.unary_union)].copy()
C:\Users\asus\AppData\Local\Temp\ipykernel_12476\2778132266.py:3: DeprecationWarning: The 'unary_union' attribute is deprecated, use the 'union_all()' method instead.
  parking_zones_in_cbd = parking_zones_gdf[parking_zones_gdf.intersects(cbd_gdf.unary_union)].copy()
In [ ]:
print(f"parking zones in cbd: {len(parking_zones_in_cbd)}")
parking zones in cbd: 40
In [98]:
parking_zones_in_cbd.head()
Out[98]:
NAME RPP_ZONE ANC_ID geometry
0 ANC 4A 4 4A POLYGON ((-77.03331 38.98999, -77.03332 38.99,...
1 ANC 3G 3 3G POLYGON ((-77.05161 38.98615, -77.05181 38.986...
2 ANC 4B 4 4B POLYGON ((-76.99908 38.96328, -76.99909 38.963...
3 ANC 3F 3 3F POLYGON ((-77.04702 38.96038, -77.04722 38.960...
4 ANC 4C 4 4C POLYGON ((-77.02803 38.95622, -77.02804 38.956...

الطلب الاول : b.من خام الى مصقول b.1 b.2

In [ ]:
def add_time_features_fast(df):
    started = df['started_at']
    ended = df['ended_at']
    
    start_year = started.dt.year
    end_year = ended.dt.year

    start_month = started.dt.month
    end_month = ended.dt.month

    start_day = started.dt.day
    end_day = ended.dt.day

    start_day_name = started.dt.day_name()
    end_day_name = ended.dt.day_name()
    start_hour_24 = started.dt.hour
    start_hour_12 = ((start_hour_24 + 11) % 12) + 1
    start_am_pm = np.where(start_hour_24 < 12, 'AM', 'PM')
    start_hour_am_pm = start_hour_12.astype(str) + ' ' + start_am_pm

    end_hour_24 = ended.dt.hour
    end_hour_12 = ((end_hour_24 + 11) % 12) + 1
    end_am_pm = np.where(end_hour_24 < 12, 'AM', 'PM')
    end_hour_am_pm = end_hour_12.astype(str) + ' ' + end_am_pm

    ride_duration_min = ((ended - started).dt.total_seconds() / 60).round(1)

    return df.assign(
        start_year=start_year,
        end_year=end_year,
        start_month=start_month,
        end_month=end_month,
        start_day=start_day,
        end_day=end_day,
        start_day_name=start_day_name,
        end_day_name=end_day_name,
        start_hour_am_pm=start_hour_am_pm,
        end_hour_am_pm=end_hour_am_pm,
        ride_duration_min=ride_duration_min
    )

b.من خام الى مصقول b.3 b.6

In [ ]:
def add_business_area_flag_fast(df, df_station):
    business_dict = df_station.set_index('name')['inside_Business'].to_dict()

    start_inside = df['start_station_name'].map(business_dict)
    end_inside = df['end_station_name'].map(business_dict)
    

    trip_in_business_area = (start_inside.fillna(False) | end_inside.fillna(False))
    df['trip_in_business_area'] = trip_in_business_area
    
    return df

def calculate_costs_fast(df):
    member_mask = df['member_casual'] == 'member'
    casual_mask = df['member_casual'] == 'casual'
    
    df['base_cost'] = 0.0
    df.loc[member_mask, 'base_cost'] = 3.95
    df.loc[casual_mask, 'base_cost'] = 1.00
    
 
    df['extra_minutes'] = 0
    df.loc[member_mask, 'extra_minutes'] = np.maximum(df.loc[member_mask, 'ride_duration_min'] - 45, 0)
    
    df['extra_time_fee'] = 0.0
    df.loc[member_mask & (df['rideable_type'] == 'classic_bike'), 'extra_time_fee'] = df['extra_minutes'] * 0.05
    df.loc[member_mask & (df['rideable_type'] == 'electric_bike'), 'extra_time_fee'] = df['extra_minutes'] * 0.1
    
    df['extra_time_fee_casual'] = 0.0
    df.loc[casual_mask & (df['rideable_type'] == 'classic_bike'), 'extra_time_fee_casual'] = df.loc[casual_mask, 'ride_duration_min'] * 0.05
    df.loc[casual_mask & (df['rideable_type'] == 'electric_bike'), 'extra_time_fee_casual'] = df.loc[casual_mask, 'ride_duration_min'] * 0.15
    
    df['extra_time_fee'] = df['extra_time_fee'] + df['extra_time_fee_casual']
    
    df['long_trip_fee'] = np.where(df['ride_duration_min'] > 1440, 3.0, 0.0)
    
    df['business_area_fee'] = np.where(df['trip_in_business_area'], 0.5, 0.0)
    
    df['total_cost'] = df['base_cost'] + df['extra_time_fee'] + df['long_trip_fee'] + df['business_area_fee']
    
    df['total_cost'] = df['total_cost'].round(2)
    
    df.drop(columns=['extra_minutes', 'extra_time_fee_casual'], inplace=True)
    
    return df
In [101]:
df = (df
      .pipe(add_time_features_fast)
      .pipe(add_business_area_flag_fast, df_station=df_station)
      .pipe(calculate_costs_fast)

     )
In [102]:
print(df_station.columns)
Index(['id', 'name', 'lat', 'lng', 'inside_dc', 'inside_Business'], dtype='object')
In [103]:
print(f"عدد الصفوف التي مدة الرحلة فيها <= 0 دقيقة: {(df['ride_duration_min'] <= 0).sum()}")

df = df[df['ride_duration_min'] > 0]


print(f"عدد الصفوف بعد التنظيف: {df.shape[0]}")
عدد الصفوف التي مدة الرحلة فيها <= 0 دقيقة: 10537
عدد الصفوف بعد التنظيف: 6103742
In [ ]:
bins = [0, 15, 25, locations_df['CAPACITY'].max()]
labels = ['Small' , 'Medium', 'Large']

locations_df['CAPACITY_BIN'] = pd.cut(locations_df['CAPACITY'], bins=bins, labels=labels, include_lowest=True)
In [105]:
locations_df['CAPACITY_BIN'].value_counts()
Out[105]:
CAPACITY_BIN
Small     417
Medium    327
Large      50
Name: count, dtype: int64

small¶

اكثر من 400

هي الأكبر من حيث عدد المحطات

المحطات الصغيرة متوزعة بشكل كبير

medium¶

عدد المحطات تقريبا بين 300 و 350

ممكن انو تكون هل محطات بمناطق مو معجوقة كتير

large¶

اقل من 100 محطة كبيرة

ممكن انو تكون باماكن معينة جدا

مثل محطات مركزية او مناطق عالية الطلب

In [115]:
capacity_counts = locations_df['CAPACITY_BIN'].value_counts().reset_index()
capacity_counts.columns = ['Capacity_Bin', 'Count']
"""
fig = px.bar(
    capacity_counts,
    x='Capacity_Bin',
    y='Count',
    labels={'Capacity_Bin': 'Capacity Category', 'Count': 'Number of Stations'},
    title='Number of Stations by Capacity Category',
    color_discrete_sequence=['#636EFA']
)

fig.update_layout(template='plotly_white')
fig.show()
"""
image_path = r'C:\Users\ASUS\OneDrive\Desktop\146.png'

image = Image.open(image_path)
display(image)
No description has been provided for this image
In [ ]:
def add_nearest_distance_and_id(df, ref_df, ref_lat_col='LATITUDE', ref_lng_col='LONGITUDE', ref_id_col='ID',
                                start_lat_col='start_lat_y', start_lng_col='start_lng_y',
                                end_lat_col='end_lat_y', end_lng_col='end_lng_y',
                                start_output_col='start_distance_to_ref_m',
                                end_output_col='end_distance_to_ref_m',
                                start_id_output_col='start_nearest_ref_id',
                                end_id_output_col='end_nearest_ref_id'):
   
    ref_coords = np.vstack((ref_df[ref_lng_col].values, ref_df[ref_lat_col].values)).T
    ref_tree = cKDTree(ref_coords)
    
    valid_start_mask = df[start_lat_col].notna() & df[start_lng_col].notna()
    valid_end_mask = df[end_lat_col].notna() & df[end_lng_col].notna()
    
    start_coords = np.vstack((
        df.loc[valid_start_mask, start_lng_col].values,
        df.loc[valid_start_mask, start_lat_col].values
    )).T
    
    end_coords = np.vstack((
        df.loc[valid_end_mask, end_lng_col].values,
        df.loc[valid_end_mask, end_lat_col].values
    )).T
    
    dist_start, idx_start = ref_tree.query(start_coords, k=1)
    dist_end, idx_end = ref_tree.query(end_coords, k=1)
    
    factor_to_meters = 111320
    
    df[start_output_col] = np.nan
    df[end_output_col] = np.nan
    
    df.loc[valid_start_mask, start_output_col] = dist_start * factor_to_meters
    df.loc[valid_end_mask, end_output_col] = dist_end * factor_to_meters
    
    df[start_id_output_col] = np.nan
    df[end_id_output_col] = np.nan
    
    nearest_start_ids = ref_df.iloc[idx_start][ref_id_col].values
    nearest_end_ids = ref_df.iloc[idx_end][ref_id_col].values
    
    df.loc[valid_start_mask, start_id_output_col] = nearest_start_ids
    df.loc[valid_end_mask, end_id_output_col] = nearest_end_ids
    
    return df
In [107]:
df = add_nearest_distance_and_id(df, shuttle_df,
                                 ref_lat_col='LATITUDE', ref_lng_col='LONGITUDE', ref_id_col='ID',
                                 start_output_col='start_distance_to_shuttle_m',
                                 end_output_col='end_distance_to_shuttle_m',
                                 start_id_output_col='start_nearest_shuttle_id',
                                 end_id_output_col='end_nearest_shuttle_id')
In [108]:
df = add_nearest_distance_and_id(df, metro_df,
                                 ref_lat_col='LATITUDE', ref_lng_col='LONGITUDE', ref_id_col='ID',
                                 start_output_col='start_distance_to_metro_m',
                                 end_output_col='end_distance_to_metro_m',
                                 start_id_output_col='start_nearest_metro_id',
                                 end_id_output_col='end_nearest_metro_id')
In [109]:
lat_min = df['end_distance_to_shuttle_m'].min()
lat_max =df['end_distance_to_shuttle_m'].max()
lng_min = df['start_distance_to_shuttle_m'].min()
lng_max = df['start_distance_to_shuttle_m'].max()

print(" نطاق    البعد عن الموقف حافلة:")
print(f": من {lat_min} إلى {lat_max}")
print(f": من {lng_min} إلى {lng_max}")
 نطاق    البعد عن الموقف حافلة:
: من 6.698338510186922 إلى 30531.86919690506
: من 6.698338510186922 إلى 30531.86919690506
In [117]:
lat_min = df['end_distance_to_metro_m'].min()
lat_max =df['end_distance_to_metro_m'].max()
lng_min = df['start_distance_to_metro_m'].min()
lng_max = df['start_distance_to_metro_m'].max()

print(" نطاق البعد عن موقف المترو:")
print(f": من {lat_min} إلى {lat_max}")
print(f": من {lng_min} إلى {lng_max}")
 نطاق البعد عن موقف المترو:
: من 1.266184083096535 إلى 4373.904399448221
: من 1.266184083096535 إلى 4373.904399448221

b.7

In [ ]:
def prepare_stations(df_station, business_boundary):
    stations = df_station[['name', 'lat', 'lng', 'inside_Business']].copy()
    stations = stations.rename(columns={'name': 'end_station_name'})
    stations['geometry'] = stations.apply(lambda row: Point(row['lng'], row['lat']), axis=1)
    gdf_stations = gpd.GeoDataFrame(stations, geometry='geometry', crs='EPSG:4326')

    gdf_stations = gdf_stations.to_crs(business_boundary.crs)

    gdf_stations['distance_to_business_area'] = gdf_stations.geometry.apply(lambda x: business_boundary.distance(x).min())
    threshold = 500
    gdf_stations['close_to_business_area'] = gdf_stations['distance_to_business_area'] <= threshold

    return gdf_stations[['end_station_name', 'distance_to_business_area', 'close_to_business_area', 'inside_Business']]
def add_distance_flag(df, gdf_stations, df_station):
    start_inside_dict = df_station.set_index('name')['inside_Business'].to_dict()
  
    end_inside_dict = start_inside_dict 
    df = df.merge(gdf_stations[['end_station_name', 'distance_to_business_area', 'close_to_business_area']],
                  on='end_station_name', how='left')

    start_inside = df['start_station_name'].map(start_inside_dict).fillna(False)
    end_inside = df['end_station_name'].map(end_inside_dict).fillna(False)
    mask = start_inside & end_inside
    df.loc[mask, ['distance_to_business_area', 'close_to_business_area']] = np.nan

    return df

gdf_stations_prepared = prepare_stations(df_station, Business_boundary)

df = df.pipe(add_distance_flag, gdf_stations=gdf_stations_prepared, df_station=df_station)
C:\Users\asus\AppData\Local\Temp\ipykernel_12476\257399541.py:12: UserWarning: Geometry is in a geographic CRS. Results from 'distance' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.

  gdf_stations['distance_to_business_area'] = gdf_stations.geometry.apply(lambda x: business_boundary.distance(x).min())

b.8

In [ ]:
coords = df_station[['lat', 'lng']].values
tree = cKDTree(coords)

distances, indices = tree.query(coords, k=2)
nearest_distances = distances[:, 1]

nearest_distances_km = nearest_distances * 111 

print("أدنى مسافة بين محطات (كم):", nearest_distances_km.min())
print("متوسط المسافة بين المحطات (كم):", nearest_distances_km.mean())
print("أعلى مسافة بين أقرب محطات (كم):", nearest_distances_km.max())
أدنى مسافة بين محطات (كم): 0.0003214075879907089
متوسط المسافة بين المحطات (كم): 0.43477205898036936
أعلى مسافة بين أقرب محطات (كم): 3.479772189213669
In [129]:
df_station.drop(columns='geohash', inplace=True)
df.drop(columns='start_geohash', inplace=True)
df.drop(columns='end_geohash', inplace=True)
In [131]:
for i, col in enumerate(df.columns):
    print(f"{i}: {col}")
0: ride_id
1: rideable_type
2: started_at
3: ended_at
4: start_station_name
5: start_station_id
6: end_station_name
7: end_station_id
8: start_lat_x
9: start_lng_x
10: end_lat_x
11: end_lng_x
12: member_casual
13: start_lat_y
14: start_lng_y
15: end_lat_y
16: end_lng_y
17: start_year
18: end_year
19: start_month
20: end_month
21: start_day
22: end_day
23: start_day_name
24: end_day_name
25: start_hour_am_pm
26: end_hour_am_pm
27: ride_duration_min
28: trip_in_business_area
29: base_cost
30: extra_time_fee
31: long_trip_fee
32: business_area_fee
33: total_cost
34: start_distance_to_shuttle_m
35: end_distance_to_shuttle_m
36: start_nearest_shuttle_id
37: end_nearest_shuttle_id
38: start_distance_to_metro_m
39: end_distance_to_metro_m
40: start_nearest_metro_id
41: end_nearest_metro_id
42: distance_to_business_area_x
43: close_to_business_area_x
44: distance_to_business_area_y
45: close_to_business_area_y
46: distance_to_business_area
47: close_to_business_area
In [132]:
df_station['geohash'] = df_station.apply(lambda row: geohash.encode(row['lat'], row['lng'], precision=7), axis=1)
In [ ]:
df = df.merge(df_station[['name', 'geohash']], how='left', left_on='start_station_name', right_on='name')
df.rename(columns={'geohash': 'start_geohash'}, inplace=True)
df.drop(columns=['name'], inplace=True)
df = df.merge(df_station[['name', 'geohash']], how='left', left_on='end_station_name', right_on='name')
df.rename(columns={'geohash': 'end_geohash'}, inplace=True)
df.drop(columns=['name'], inplace=True)
In [ ]:
start_counts = df['start_geohash'].value_counts().reset_index()
start_counts.columns = ['geohash', 'start_count']

end_counts = df['end_geohash'].value_counts().reset_index()
end_counts.columns = ['geohash', 'end_count']
counts = start_counts.merge(end_counts, on='geohash', how='outer').fillna(0)

b.9

In [ ]:
df['start_date'] = pd.to_datetime(df['started_at']).dt.date
start_counts = df.groupby(['start_geohash', 'start_date']).size().reset_index(name='count')

end_counts = df.groupby(['end_geohash', 'start_date']).size().reset_index(name='count')

start_counts.rename(columns={'start_geohash': 'geohash'}, inplace=True)
end_counts.rename(columns={'end_geohash': 'geohash'}, inplace=True)
all_counts = pd.concat([start_counts, end_counts])

daily_counts = all_counts.groupby(['geohash', 'start_date'])['count'].sum().reset_index()
In [136]:
mean_daily = daily_counts.groupby('geohash')['count'].mean().reset_index()
mean_daily.rename(columns={'count': 'mean_daily_usage'}, inplace=True)
In [137]:
max_usage = mean_daily['mean_daily_usage'].max()

def classify_usage(x):
    pct = (x / max_usage) * 100
    if pct > 70:
        return 'أحمر'
    elif pct > 50:
        return 'أصفر'
    elif pct <= 50:
        return 'رمادي'


mean_daily['category'] = mean_daily['mean_daily_usage'].apply(classify_usage)
In [138]:
mean_daily['category'].value_counts()
Out[138]:
category
رمادي    776
أصفر      16
أحمر       8
Name: count, dtype: int64
In [ ]:
  def map_conditions(cond):
    cond = cond.lower()
    if 'rain' in cond or 'snow' in cond:
        return 'Rainy'
    elif 'overcast' in cond:
        return 'Cloudy'
    elif 'clear' in cond or 'partially cloudy' in cond:
        return 'Sunny'


weather_df['weather_bin'] = weather_df['conditions'].apply(map_conditions)
In [ ]:
weather_df['weather_bin'].value_counts()
Out[ ]:
weather_bin
Sunny     221
Rainy     139
Cloudy      6
Name: count, dtype: int64

Sunny¶

عدد الايام المشمسة تقريبا 220 يوم

يعني اغلب الايام مشمسة

Rainy¶

عدد الايام الماطرة حوالي 140 يوم

Cloudy¶

عدد الايام الغائمة قليل كتير

In [132]:
weather_counts = weather_df['weather_bin'].value_counts().reset_index()
weather_counts.columns = ['Weather', 'Count']
"""
fig = px.bar(
    weather_counts,
    x='Weather',
    y='Count',
    title='Weather Conditions Distribution',
    color='Weather',
    color_discrete_sequence=px.colors.qualitative.Set2
)

fig.update_layout(template='plotly_white')
fig.show()
"""
image_path = r'C:\Users\ASUS\OneDrive\Desktop\170.png'

image = Image.open(image_path)
display(image)
No description has been provided for this image
In [ ]:
df['end_date'] = df['ended_at'].dt.date
daily_revenue = (
    df.groupby('end_date')['total_cost']
    .sum()
    .reset_index()
    .rename(columns={'end_date': 'date', 'total_cost': 'daily_revenue'})
)
weather_df['date'] = pd.to_datetime(weather_df['datetime']).dt.date
daily_revenue_weather = pd.merge(
    daily_revenue,
    weather_df[['date', 'weather_bin']], 
    on='date',
    how='left'
)

print(daily_revenue_weather.head())
         date  daily_revenue weather_bin
0  2024-01-01       18181.08       Rainy
1  2024-01-02       32445.09       Sunny
2  2024-01-03       34860.97       Sunny
3  2024-01-04       33082.30       Sunny
4  2024-01-05       31989.71       Sunny
In [ ]:
"""
fig = px.line(
    daily_revenue_weather,
    x='date',
    y='daily_revenue',
    color='weather_bin',  # يلون حسب حالة الطقس
    title='Daily Total Revenue with Weather Condition',
    labels={
        'date': 'Date',
        'daily_revenue': 'Total Revenue',
        'weather_bin': 'Weather Condition'
    }
)

# تحسين المظهر
fig.update_layout(
    xaxis_title='Date',
    yaxis_title='Total Revenue',
    hovermode='x unified'
)

fig.show()
"""
image_path = r'C:\Users\ASUS\OneDrive\Desktop\181.png'
image = Image.open(image_path)
display(image)
No description has been provided for this image

شرح المخطط¶

• الإيرادات في الأيام المشمسة (الأحمر) عادة ما تكون أعلى وأكثر استقراراً على مدار السنة

• الإيرادات في الأيام الممطرة (الأزرق) متقلبة أكثر، لكن تميل للارتفاع في بعض الفترات مثل الربيع والصيف، ثم تنخفض نحو نهاية السنة

• الإيرادات في الأيام الغائمة (الأخضر) أقل بكثير مقارنة بالأيام المشمسة والممطرة، ولها ميل هبوطي تدريجي عبر السنة

• يبدو أن الطقس يؤثر بشكل واضح على الإيرادات اليومية مع تفضيل أكبر للأيام المشمسة للحصول على دخل أعلى

In [135]:
df_station.to_csv(r'C:\Users\ASUS\OneDrive\Desktop\stationsc.csv', index=False)
df.to_parquet(r'C:\Users\ASUS\OneDrive\Desktop\daily-rentc.parquet', index=False)
metro_df.to_csv(r'C:\Users\ASUS\OneDrive\Desktop\Metro_Bus_Stopsc.csv', index=False)

shuttle_df.to_csv(r'C:\Users\ASUS\OneDrive\Desktop\Shuttle_Bus_Stopsc.csv', index=False)
weather_df.to_csv(r'C:\Users\ASUS\OneDrive\Desktop\Washington,DC,USA 2024-01-01 to 2024-12-31c.csv', index=False)
locations_df.to_csv(r'C:\Users\ASUS\OneDrive\Desktop\Capital_Bikeshare_Locationsc.csv', index=False)
In [ ]:
df_station = pd.read_csv(r'C:\Users\ASUS\OneDrive\Desktop\stationsc.csv')

df = pd.read_parquet(r'C:\Users\ASUS\OneDrive\Desktop\daily-rentc.parquet')


metro_df= pd.read_csv(r'C:\Users\ASUS\OneDrive\Desktop\Metro_Bus_Stopsc.csv')
shuttle_df = pd.read_csv(r'C:\Users\ASUS\OneDrive\Desktop\Shuttle_Bus_Stopsc.csv')

weather_df = pd.read_csv(r'C:\Users\ASUS\OneDrive\Desktop\Washington,DC,USA 2024-01-01 to 2024-12-31c.csv') 

locations_df = pd.read_csv(r'C:\Users\ASUS\OneDrive\Desktop\Capital_Bikeshare_Locationsc.csv')

df['start_station_name'] = df['start_station_name'].replace('nan', np.nan)
df['end_station_name'] = df['end_station_name'].replace('nan', np.nan)

cbd_gdf = gpd.read_file(r'C:\Users\ASUS\Downloads\data-20250530T151946Z-1-001\data\DDOT_Central_Business_District.geojson')
parking_zones_gdf = gpd.read_file(r'C:\Users\ASUS\Downloads\data-20250530T151946Z-1-001\data\Residential_and_Visitor_Parking_Zones.geojson')
dc_boundary = gpd.read_file(r'C:\Users\ASUS\Downloads\Washington_DC_Boundary_Stone_Area.geojson')
Business_boundary =gpd.read_file(r'C:\Users\ASUS\Downloads\data-20250530T151946Z-1-001\data\DDOT_Central_Business_District.geojson')

2_ استكشاف وتحليل

a.دراسة حالة الدراجات والمحطات

a.1

In [ ]:
top5_start_stations = (
    df['start_station_name']
    .value_counts()
    .head(5)
    .reset_index()
    .rename(columns={'index': 'start_station_name', 'count': 'trip_count'})
)

""""
fig = px.bar(
    top5_start_stations,
    x='start_station_name',
    y='trip_count',
    color='start_station_name',
    text='trip_count',
    title='Top 5 محطات الانطلاق الأكثر استخداماً',
    labels={'start_station_name': 'اسم محطة الانطلاق', 'trip_count': 'عدد الرحلات'}
)


fig.update_layout(
    xaxis_title='محطة الانطلاق',
    yaxis_title='عدد الرحلات',
    showlegend=False
)

fig.show()
"""
image_path = r'C:\Users\ASUS\OneDrive\Desktop\a1.png'
image = Image.open(image_path)
display(image)
No description has been provided for this image

شرح المخطط¶

• هذه المحطات هي الأكثر جذباً للمستخدمين

• سواء كبداية للرحلة أو نهايتها

• محطة columbus circle / union station

• هي الأعلى استخدامًا، إذ كان هناك أكثر من 53 ألف رحلة بدأت أو انتهت عندها

• المحطات الأخرى تتراوح أعداد الاستخدام فيها بين حوالي 40 ألف إلى 50 ألف رحلة

لماذا هذا مهم؟

• تحليل شعبية المحطات يساعد مشغلي النظام في

o تحسين الصيانة والتزويد بالدراجات في المحطات الأكثر طلبًا

o تخطيط التوسعات المستقبلية للنظام في مناطق مشابهة

o فهم حركة المستخدمين وتوزيعهم جغرافياً لتحسين جودة الخدمة

• يوفر المخطط :مناطق التركيز والتجمعات الكبيرة التي تنشط بها حركة الدراجات

• يستطيع المسؤولين عن إدارة هذا النظام، تركيز على هذه المحطات للارتقاء بالخدمات

يمكن أيضاً استخدام هذه المحطات لتصميم حملات تسويقية أو إعلانية لانو هي اكثر محطات استخدام

a.2

In [ ]:
grouped_df = (
    df.groupby(['rideable_type', 'member_casual'])
    .size()
    .reset_index(name='trip_count')
)

"""

fig = px.bar(
    grouped_df,
    x='rideable_type',
    y='trip_count',
    color='member_casual',
    barmode='group',  # عرض كل مجموعة جنباً إلى جنب
    text='trip_count',
    title='توزيع عدد الرحلات حسب نوع الدراجة ونوع الاشتراك',
    labels={'rideable_type': 'نوع الدراجة', 'trip_count': 'عدد الرحلات', 'member_casual': 'نوع الاشتراك'}
)

fig.update_layout(
    xaxis_title='نوع الدراجة',
    yaxis_title='عدد الرحلات',
    legend_title='نوع الاشتراك'
)

fig.show()
"""
image_path = r'C:\Users\ASUS\OneDrive\Desktop\a2.png'
image = Image.open(image_path)
display(image)
No description has been provided for this image

شرح المخطط¶

أ. الأعضاء المسجلون

(member) يستخدمون الدراجة الكهربائية أكثر بكثير

• عدد رحلات الأعضاء على الدراجة الكهربائية (2,290,957) يزيد بشكل ملحوظ عن عدد رحلاتهم على الدراجة التقليدية (1,760,891)

• هذا يشير إلى أن الأعضاء يميلون لاستخدام الدراجة الكهربائية أكثر، ربما لأنها أسهل أو أسرع أو أكثر راحة

ب. المستخدمون غير المسجلين (casual) يستخدمون الدراجة الكهربائية أيضاً أكثر قليلاً

• عدد رحلاتهم على الدراجة الكهربائية (1,105,394) أكبر من رحلات الدراجة التقليدية (946,500)، مع فارق أقل مقارنة بالأعضاء

• يدل ذلك على توجه بسيط نحو الدراجة الكهربائية حتى بين المستخدمين العاديين

• استخدام الدراجات الكهربائية يزداد بشكل واضح عند كل من الأعضاء والمستخدمين العاديين

• الأعضاء هم الفئة الأكثر استخداماً للخدمة بشكل عام

• الدراجات الكهربائية تكتسب شعبية أكثر من الدراجات التقليدية لدى جميع المستخدمين، وهذا يمكن أن يشير إلى مزاياها الأفضل بالمقارنة مع الدراجة الكلاسيكية

a.3

In [ ]:
top5_stations = top5_start_stations['start_station_name'].tolist()
df_top5 = df[df['start_station_name'].isin(top5_stations)]

sunburst_df = (
    df_top5.groupby(['start_station_name', 'rideable_type', 'member_casual'])
    .size()
    .reset_index(name='trip_count')
)

"""
fig = px.sunburst(
    sunburst_df,
    path=['start_station_name', 'rideable_type', 'member_casual'],
    values='trip_count',
    color='rideable_type',  # تلوين حسب نوع الدراجة
    title='توزيع الرحلات حسب أفضل 5 محطات، نوع الدراجة، ونوع الاشتراك',
)

fig.update_layout(margin=dict(t=40, l=0, r=0, b=0))
fig.show()
"""
image_path = r'C:\Users\ASUS\OneDrive\Desktop\a3.png'
image = Image.open(image_path)
display(image)
No description has been provided for this image

شرح المخطط¶

  1. 14th & V St NW

الاستنتاج

محطة عالية الاستخدام من قبل الأعضاء، مع تفضيل قوي لـ الدراجات الكهربائية

توصيات

• تعزيز عدد الدراجات الكهربائية في هذه المحطة

• توفير محطات شحن إضافية للدراجات الكهربائية

  1. Columbus Circle / Union Station

الاستنتاج

محطة مركزية بتوزيع متوازن بين الدراجات التقليدية والكهربائية، وتخدم كلا الفئتين (الأعضاء والعاديين)

توصيات

• المحافظة على التوازن بين أنواع الدراجات

• إطلاق مبادرات لزيادة الاشتراكات وتحويل المستخدمين العاديين إلى أعضاء دائمين

  1. New Hampshire Ave & T St NW

الاستنتاج

تميل لاستخدام الدراجات التقليدية، خاصة من قبل المستخدمين العاديين، مع انخراط جيد من الأعضاء

توصيات

• زيادة الدراجات التقليدية لتلبية الطلب

• إطلاق عروض خاصة لتشجيع الأعضاء على استخدام الدراجات الكهربائية

• تقديم خصومات أو مزايا للعضوية لجذب المستخدمين العاديين

  1. 139 B & P St NW

الاستنتاج

محطة مخصصة فعليًا للأعضاء، مع تفضيل قوي للدراجات الكهربائية، واستخدام منخفض من المستخدمين العاديين

توصيات

• تعزيز مرافق الخدمة المخصصة للأعضاء (مثل صيانة فورية، أو أولوية في الحجز)

• زيادة عدد الدراجات الكهربائية

• إطلاق حملات عضوية لاستقطاب فئات جديدة من المستخدمين

  1. 15th & St at None

الاستنتاج

محطة متنوعة في الاستخدام، مع توازن نسبي بين أنواع الدراجات، لكن مع ميول خفيفة نحو الدراجات الكهربائية من الأعضاء

توصيات: • الحفاظ على التوازن الحالي بين الدراجات الكهربائية والتقليدية

a.4

In [139]:
"""
fig = px.histogram(
    locations_df,
    x='CAPACITY',
    nbins=20,
    title='Distribution of Station Capacity',
    labels={'CAPACITY': 'Station Capacity', 'count': 'Number of Stations'}
)

fig.update_layout(
    xaxis_title='Station Capacity',
    yaxis_title='Number of Stations'
)

fig.show()
"""


image_path = r'C:\Users\ASUS\OneDrive\Desktop\a4.png'

image = Image.open(image_path)
display(image)
No description has been provided for this image
In [ ]:
capacity_bin_map = locations_df.set_index('NAME')['CAPACITY_BIN'].to_dict()
df['start_capacity_bin'] = df['start_station_name'].map(capacity_bin_map)
df['end_capacity_bin'] = df['end_station_name'].map(capacity_bin_map)
In [141]:
start_bin_counts = df['start_capacity_bin'].value_counts().reset_index()
start_bin_counts.columns = ['capacity_bin', 'trip_count']
"""
fig = px.bar(
    start_bin_counts,
    x='capacity_bin',
    y='trip_count',
    title='Trip Distribution by Start Station Capacity Bin',
    labels={'capacity_bin': 'Start Station Capacity Bin', 'trip_count': 'Number of Trips'}
)

fig.show()
"""

image_path = r'C:\Users\ASUS\OneDrive\Desktop\a4_2.png'

image = Image.open(image_path)
display(image)
No description has been provided for this image
In [ ]:
end_bin_counts = df['end_capacity_bin'].value_counts().reset_index()
end_bin_counts.columns = ['capacity_bin', 'trip_count']
"""
fig = px.bar(
    end_bin_counts,
    x='capacity_bin',
    y='trip_count',
    title='Trip Distribution by End Station Capacity Bin',
    labels={'capacity_bin': 'End Station Capacity Bin', 'trip_count': 'Number of Trips'}
)

fig.show()
"""
image_path = r'C:\Users\ASUS\OneDrive\Desktop\a4_3.png'
image = Image.open(image_path)
display(image)
No description has been provided for this image

شرح المخططات ال 3¶

توزيع سعات المحطات

الهدف من المخطط انو نعرف كيف تتوزع المحطات

حسب سعتها داخل الشبكة بشكل عام

معظم المحطات لديها سعة صغيرة إلى متوسطة في عدد كبير من المحطات تقع ضمن السعات المنخفضة

المحطات ذات السعة الكبيرة قليلة بما معناه انتشار واسع لمحطات ضخمة

b.دراسة مدة الرحلة

b.1

In [ ]:
bins = [-float('inf'), 1, 3, 5, 7, 10, 15, 30, 60, 120, 240, 1440, float('inf')]
labels = [
    'Under 1 minute',
    '1-3 minutes',
    '3-5 minutes',
    '5-7 minutes',
    '7-10 minutes',
    '10-15 minutes',
    '15-30 minutes',
    '30-60 minutes',
    '1-2 hours',
    '2-4 hours',
    '4 hours - 1 day',
    'Over 1 day'
]


df['duration_category'] = pd.cut(df['ride_duration_min'], bins=bins, labels=labels, right=False)

print(df['duration_category'].value_counts())
duration_category
15-30 minutes      1226972
10-15 minutes      1180431
7-10 minutes       1047759
5-7 minutes         803570
3-5 minutes         750383
30-60 minutes       430980
1-3 minutes         405324
1-2 hours           115624
Under 1 minute      101274
2-4 hours            28504
4 hours - 1 day       7893
Over 1 day            5028
Name: count, dtype: int64
In [ ]:
duration_counts = df['duration_category'].value_counts().sort_index()
duration_df = duration_counts.reset_index()
duration_df.columns = ['مدة الرحلة', 'عدد الرحلات']
"""
fig = px.bar(
    duration_df,
    x='مدة الرحلة',
    y='عدد الرحلات',
    text='عدد الرحلات',
    title='توزع عدد الرحلات حسب فئات مدة الرحلة',
    labels={'مدة الرحلة': 'مدة الرحلة', 'عدد الرحلات': 'عدد الرحلات'},
    color='عدد الرحلات',
    color_continuous_scale='Blues'
)

fig.update_traces(texttemplate='%{text:,}', textposition='outside')
fig.update_layout(
    xaxis_title='فئة مدة الرحلة',
    yaxis_title='عدد الرحلات',
    uniformtext_minsize=8,
    uniformtext_mode='hide',
    margin=dict(t=40, b=80),
    coloraxis_showscale=False
)

fig.show()
"""
image_path = r'C:\Users\ASUS\OneDrive\Desktop\b1.png'
image = Image.open(image_path)
display(image)
No description has been provided for this image

شرح المخطط¶

  1. كلما زادت مدة الرحلة، قل عدد المستخدمين بشكل ملحوظ

  2. هذا النوع من التوزيع مفيد جدًا في تحسين الخدمة

o تحسين توزيع المحطات

o مراجعة الرحلات غير الطبيعية

o تصميم خطط أسعار حسب وقت الاستخدام

b.2

In [ ]:
"""
fig = px.box(
    df,
    x='rideable_type',
    y='ride_duration_min',
    title='توزع مدة الرحلة حسب نوع الدراجة',
    labels={
        'rideable_type': 'نوع الدراجة',
        'ride_duration_min': 'مدة الرحلة (دقائق)'
    },
    points='outliers' 
)

fig.show()
"""
"""
sample_df = df.sample(n=100000, random_state=42)
fig = px.box(
sample_df,
   x='rideable_type',
    y='ride_duration_min',
    title='توزع مدة الرحلة حسب نوع الدراجة',
    labels={
        'rideable_type': 'نوع الدراجة',
        'ride_duration_min': 'مدة الرحلة (دقائق)'
    },
    points='outliers' 
)

fig.show()
"""

image_path = r'C:\Users\ASUS\OneDrive\Desktop\b2.png'
image = Image.open(image_path)
display(image)
image_path = r'C:\Users\ASUS\OneDrive\Desktop\b3ss.png'
image = Image.open(image_path)
display(image)
No description has been provided for this image
No description has been provided for this image

بسبب عدد الرحلات الكبير المخطط لم يكن واضح لاستنتاج منه أي معلومة: وتم تجربة اخذ عينة تم ظهور قيم متطرفة بشكل أوضح لكن لايزال سيئ¶

In [ ]:
"""
fig = px.box(
    df,
    x='rideable_type',
    y='ride_duration_min',
    points='outliers',  # عرض النقاط الشاذة
    title='Box Plot of Ride Duration by Rideable Type (Log Scale)',
    labels={
        'rideable_type': 'Rideable Type',
        'ride_duration_min': 'Ride Duration (minutes)'
    }
)

# نحط المحور Y لوغاريتمي
fig.update_yaxes(type='log')

fig.show()
"""
image_path = r'C:\Users\ASUS\OneDrive\Desktop\b2log.png'
image = Image.open(image_path)
display(image)
No description has been provided for this image

لذلك تم استخدام مخطط لوغريتمي¶

شرح المحطط¶

• معظم الرحلات سواء على classic_bike أو electric_bike تقع في نفس النطاق تقريبًا (بين ~5 و30 دقيقة)

• يوجد عدد كبير من القيم الشاذة، خاصة في classic_bike

• توزيع electric_bike يبدو أكثر تماسكًا وأقل تنوعًا من classic_bike

b.3

In [147]:
sample_df = df.sample(n=100000, random_state=42)
"""
fig = px.box(
 df,
    x='member_casual',
    y='ride_duration_min',
    title='توزع مدة الرحلة حسب نوع اشتراك',
    labels={
        'member_casual': 'member Type',
        'ride_duration_min': 'مدة الرحلة (دقائق)'
    },
    points='outliers' 
)
fig.show()
"""
"""
fig = px.box(
sample_df,
    x='member_casual',
    y='ride_duration_min',
    title='توزع مدة الرحلة حسب نوع اشتراك',
    labels={
        'member_casual': 'member Type',
        'ride_duration_min': 'مدة الرحلة (دقائق)'
    },
    points='outliers' 
)
fig.show()
"""
image_path = r'C:\Users\ASUS\OneDrive\Desktop\b31.png'

image = Image.open(image_path)
display(image)
image_path = r'C:\Users\ASUS\OneDrive\Desktop\b31ss.png'

image = Image.open(image_path)
display(image)
No description has been provided for this image
No description has been provided for this image
In [148]:
"""
fig = px.box(
    df,
    x='member_casual',
    y='ride_duration_min',
    points='outliers',  # عرض النقاط الشاذة
    title='Box Plot of Ride Duration by member Type (Log Scale)',
    labels={
        'member_casual': 'member Type',
        'ride_duration_min': 'Ride Duration (minutes)'
    }
)

# نحط المحور Y لوغاريتمي
fig.update_yaxes(type='log')

fig.show()
"""
image_path = r'C:\Users\ASUS\OneDrive\Desktop\b32.png'

image = Image.open(image_path)
display(image)
No description has been provided for this image

شرح المخطط¶

تم استخدام اللوغريتمي

  1. مدة الرحلة عند الزوار (casual) أطول بشكل عام من الأعضاء

o الوسيط أعلى

o مدى التوزيع أوسع

o الرحلات الأطول شائعة أكثر

  1. الأعضاء (members) يستخدمون الدراجة لرحلات أقصر ومتكررة غالبًا

o هذا منطقي لأنهم ربما يستخدمونها للتنقل اليومي (للعمل، الجامعة، إلخ)

الزوار (casual) قد يستخدمون الدراجة لأغراض ترفيهية أو سياحية، لذلك تستمر الرحلات لفترة أطول

b.4

In [ ]:
# df_long_trips = df[df['ride_duration_min'] > 1440].copy()
# start_counts = (
#     df_long_trips.groupby('start_station_name')
#     .size()
#     .reset_index(name='start_trip_count')
# )
# end_counts = (
#     df_long_trips.groupby('end_station_name')
#     .size()
#     .reset_index(name='end_trip_count')
# )

# station_counts = pd.merge(
#     start_counts, 
#     end_counts, 
#     left_on='start_station_name', 
#     right_on='end_station_name', 
#     how='outer'
# )

# station_counts['station_name'] = station_counts['start_station_name'].combine_first(station_counts['end_station_name'])
# station_counts['start_trip_count'] = station_counts['start_trip_count'].fillna(0)
# station_counts['end_trip_count'] = station_counts['end_trip_count'].fillna(0)

# station_counts['total_long_trips'] = station_counts['start_trip_count'] + station_counts['end_trip_count']

# stations_geo = df[['start_station_name', 'start_lat_y', 'start_lng_y']].drop_duplicates()
# stations_geo = stations_geo.rename(columns={
#     'start_station_name': 'station_name',
#     'start_lat_y': 'lat',
#     'start_lng_y': 'lng'
# })

# stations_plot = pd.merge(stations_geo, station_counts[['station_name', 'total_long_trips']], on='station_name', how='left')
# stations_plot['total_long_trips'] = stations_plot['total_long_trips'].fillna(0)

# dc_boundary = gpd.read_file(r'C:\Users\ASUS\Downloads\Washington_DC_Boundary_Stone_Area.geojson')
# if dc_boundary.crs != "EPSG:4326":
#     dc_boundary = dc_boundary.to_crs(epsg=4326)

# dc_geojson = dc_boundary.__geo_interface__

# fig = px.choropleth_mapbox(
#     geojson=dc_geojson,
#     locations=dc_boundary.index,
#     color_discrete_sequence=["lightgray"],
#     center={"lat": 38.9072, "lon": -77.0369},
#     mapbox_style="carto-positron",
#     zoom=10,
#     opacity=0.4
# )

# fig.add_scattermapbox(
#     lat=stations_plot['lat'],
#     lon=stations_plot['lng'],
#     mode='markers',
#     marker=dict(
#         size=stations_plot['total_long_trips'] / stations_plot['total_long_trips'].max() * 40 + 5,  
#         color='red',
#         opacity=0.7
#     ),
#     text=stations_plot['station_name'] + '<br>Total long trips: ' + stations_plot['total_long_trips'].astype(int).astype(str),
#     name='Stations with long trips'
# )

# fig.update_layout(
#     title="محطات الدراجات التي استقبلت أو أطلقت رحلات تجاوزت يوم واحد",
#     margin={"r":0,"t":30,"l":0,"b":0}
# )

# fig.show()



image_path = r'C:\Users\ASUS\OneDrive\Desktop\b4.png'
image = Image.open(image_path)
display(image)
No description has been provided for this image

شرح المخطط¶

يوجد محطات داخل وخارج واشنطن نقطتها كبيرة ولكن داخل الحدود تكون اكبر وسبب

الرحلات التي تتجاوز يوم غالبا تكون من خارج الحدود الى داخل المدينة يعني سفر

c. دراسة المالية

c.1

In [144]:
bins = [1,2 ,3,3.95,4, 5, 7, 10, 15, 30, 60, 120]
labels = [
    '1–2 $',
    '2–3 $',
    '3–3.95 $',
     '3.95–4 $',
    '4–5 $',
    '5–7 $',
    '7–10 $',
    '10–15 $',
    '15–30 $',
    '30–60 $',
    '60–120 $',

]

df['total_category'] = pd.cut(df['total_cost'], bins=bins, labels=labels, right=False)

print(df['total_category'].value_counts())
total_category
3.95–4 $    3011977
4–5 $       1147289
1–2 $        717707
2–3 $        677682
3–3.95 $     304753
5–7 $        129323
7–10 $        60219
10–15 $       29250
15–30 $       15738
60–120 $       6010
30–60 $        3794
Name: count, dtype: int64
In [ ]:
df_filtered = df[df['total_category'].notna()]
"""
fig = px.histogram(
    df_filtered,
    x='total_category',
    color_discrete_sequence=['lightblue'],
    nbins=len(df_filtered['total_category'].unique()),
    opacity=0.6,
    histnorm=None
)

box_fig = px.box(
    df_filtered,
    x='total_category',
    y='total_cost',
    points='outliers',
    color_discrete_sequence=['darkblue']
)

for trace in box_fig.data:
    fig.add_trace(trace)

fig.update_layout(
    title='توزيع تكلفة الرحلات حسب الفئات',
    xaxis_title='فئة التكلفة',
    yaxis_title='عدد الرحلات / تكلفة ($)',
    barmode='overlay',
    template='plotly_white',
    yaxis_type="log",        # مقياس لوغاريتمي
    xaxis_tickangle=-45,     # تدوير التسميات على المحور X
    bargap=0.2,              # مسافة بين الأعمدة
    width=1200               # عرض الرسم
)

fig.show()

"""
image_path = r'C:\Users\ASUS\OneDrive\Desktop\c1.png'
image = Image.open(image_path)
display(image)
No description has been provided for this image

شرح المخطط¶

الغالبية العظمى من الرحلات تقع ضمن نطاق سعري ضيق (1–5 $)، مع توزيع متماثل وقيم قليلة الشذوذ مما يشير إلى تسعير ثابت للمشتركين . ( يوجد التفاصيل بتقرير)

c.2

In [ ]:
"""
df_filtered = df[df['total_cost'].notna() & df['ride_duration_min'].notna()]

fig = px.scatter(
    df_filtered,
    x='ride_duration_min',
    y='total_cost',
    trendline='ols', 
    opacity=0.5,
    color_discrete_sequence=['darkblue']
)

fig.update_layout(
    title='مخطط تكلفة الرحلة مقابل المدة الزمنية مع خط الاتجاه',
    xaxis_title='المدة الزمنية (بالدقائق أو بالساعات حسب البيانات)',
    yaxis_title='تكلفة الرحلة ($)',
    template='plotly_white',
    width=1000,
    height=600
)

fig.show()
"""
image_path = r'C:\Users\ASUS\OneDrive\Desktop\c2.png'
image = Image.open(image_path)
display(image)
No description has been provided for this image

شرح المخطط¶

توجد علاقة طردية خطية تقريبًا بين المدة الزمنية وتكلفة الرحلة كلما زادت مدة الرحلة، زادت التكلفة

• خط الاتجاه (Trendline)

• يمثل الميل العام للتكلفة مع الوقت

• مما يؤكد سياسية التسعير الثابتة

• عدة خطوط فرعية واضحة( لكن تم رسم خط التريند بدون تحديد ولا سمة ) وتفسير

• المخطط الحالي لا يميز بين الأنواع مثل نوع الدراجة أو نوع المستخدم بالتالي، جميع البيانات تم التعامل معها كـ مجموعة واحدة وبهذا يرسم trendline شامل لكل النقاط

c.3

In [ ]:
"""
df['date'] = pd.to_datetime(df['started_at']).dt.date
weather_df['date'] = pd.to_datetime(weather_df['date']).dt.date
merged_df = pd.merge(df, weather_df[['date', 'temp']], on='date', how='left')

filtered_df = merged_df.dropna(subset=['total_cost', 'temp', 'rideable_type'])
"""

"""
fig = px.scatter(
    filtered_df,
    x='temp',
    y='total_cost',
    color='rideable_type', 
    opacity=0.6,
    title='تكلفة الرحلة مقابل درجة الحرارة بحسب فئة الدراجة',
    labels={'temp': 'درجة الحرارة (°C)', 'total_cost': 'تكلفة الرحلة ($)', 'rideable_type': 'فئة الدراجة'},
    template='plotly_white'
)

fig.update_traces(marker=dict(size=6))
fig.update_layout(width=1000, height=600)
fig.show()
"""
image_path = r'C:\Users\ASUS\OneDrive\Desktop\c3.png'

image = Image.open(image_path)
display(image)
No description has been provided for this image

شرح المخطط¶

توزيع درجات الحرارة واسع

• يمتد من أقل من -5°C إلى حوالي 32°C. يدل على أن البيانات تشمل جميع الفصول (شتاء إلى صيف)

لا علاقة واضحة بين درجة الحرارة وتكلفة الرحلة

• التشتت عشوائي, لا يظهر نمط متصاعد أو تنازلي مع تغيّر الحرارة

استنتاج

درجة الحرارة ليست عاملًا مؤثرًا في تسعير الرحلة

c.4

In [ ]:
df['start_date'] = pd.to_datetime(df['started_at'].dt.date)
df['start_week']=df['started_at'].dt.to_period('W').apply(lambda r : r.start_time)

daily_revenue = df.groupby('start_date')['total_cost'].sum().reset_index()
daily_revenue['type'] = 'Daily'
weekly_revenue = df.groupby('start_week')['total_cost'].sum().reset_index()
weekly_revenue = weekly_revenue.rename(columns={'start_week': 'start_date'}) 
weekly_revenue['type'] = 'Weekly'

combined_revenue = pd.concat([daily_revenue, weekly_revenue])

fig = px.line(combined_revenue, x='start_date', y='total_cost', color='type',
              title='الإيرادات اليومية والأسبوعية',
              labels={'start_date': 'التاريخ', 'total_cost': 'الإيرادات', 'type': 'نوع الإيراد'},
              markers=True)

fig.update_layout(template='plotly_dark')  
# fig.show()
image_path = r"C:\Users\ASUS\OneDrive\Desktop\images\1.png"

image = Image.open(image_path)
display(image)

شرح المخطط¶

الإيرادات اليومية :تتميز بتقلبات حادة

الإيرادات الأسبوعية: تظهر اتجاهات أوضح وأكثر سلاسة

الاتجاهات الرئيسية

نمو في منتصف العام: شهدت الإيرادات نمواً ملحوظاً من الربيع واستمرت في الارتفاع لتصل إلى ذروتها في سبتمبر/أكتوبر 2024، متجاوزة 600,000 وحدة نقدية أسبوعياً

انخفاض حاد نهاية العام: بدأت الإيرادات في الانحدار بشكل حاد من نوفمبر 2024، لتصل إلى مستويات منخفضة جداً بحلول يناير 2025

يشير هذا النمط الموسمي القوي إلى أن العمل يعتمد بشكل كبير على العوامل الموسمية والطقس، مما يتوافق تماماً مع طبيعة "رحلات الدراجات". حيث تكون الإيرادات مرتفعة جداً خلال المواسم الدافئة (الربيع والصيف والخريف)، وتنخفض بشكل كبير خلال الشتاء

c.5

In [ ]:
df['start_month'] = df['started_at'].dt.to_period('M').astype(str)
monthly_avg = df.groupby('start_month')['total_cost'].mean().reset_index()

# fig = px.line(monthly_avg, x='start_month', y='total_cost', markers=True,
#               title='متوسط الإيرادات الشهرية',
#               labels={'start_month': 'الشهر', 'total_cost': 'متوسط الإيرادات (دولار)'})

# fig.update_layout(template='plotly_white', xaxis_tickangle=-45)
# fig.show()
image_path = r"C:\Users\ASUS\OneDrive\Desktop\images\2.png"
image = Image.open(image_path)
display(image)
No description has been provided for this image

شرح المخطط¶

شرح: بدأ متوسط الإيرادات مرتفعاً في يناير ومارس 2024 (حوالي 3.88 دولار)

شهدت الأشهر اللاحقة تذبذباً مع اتجاه عام نحو الانخفاض، حيث وصل المتوسط إلى أدنى مستوياته في نوفمبر 2024 (حوالي 3.78 دولار)

يشير الانخفاض التدريجي بعد مارس إلى تحديات في الحفاظ على مستوى المتوسط المرتفع للإيرادات

d

d.1

In [ ]:
df['start_geometry'] = [Point(xy) for xy in zip(df['start_lng_x'], df['start_lat_x'])]
df['end_geometry'] = [Point(xy) for xy in zip(df['end_lng_x'], df['end_lat_x'])]

start_gdf = gpd.GeoDataFrame(df, geometry='start_geometry', crs='EPSG:4326')
end_gdf = gpd.GeoDataFrame(df, geometry='end_geometry', crs='EPSG:4326')
parking_zones_gdf = parking_zones_gdf.to_crs(epsg=4326)
In [ ]:
parking_zones_gdf = parking_zones_gdf.to_crs(epsg=4326)
In [ ]:
start_join = gpd.sjoin(
    start_gdf[['ride_id', 'start_geometry']],
    parking_zones_gdf[['NAME', 'geometry']],
    how='left',
    predicate='within'
).rename(columns={'NAME': 'start_zone'})

end_join = gpd.sjoin(
    end_gdf[['ride_id', 'end_geometry']],
    parking_zones_gdf[['NAME', 'geometry']],
    how='left',
    predicate='within'
).rename(columns={'NAME': 'end_zone'})
In [ ]:
zone_counts = pd.concat([
    start_join['start_zone'],
    end_join['end_zone']
]).value_counts().reset_index()

zone_counts.columns = ['ZONE_NAME', 'trip_count']
In [ ]:
parking_zones_gdf = parking_zones_gdf.merge(zone_counts, left_on='NAME', right_on='ZONE_NAME', how='left')
parking_zones_gdf.drop(columns=['ZONE_NAME'], inplace=True)
parking_zones_gdf['trip_count'] = parking_zones_gdf['trip_count'].fillna(0)
In [ ]:
outside_trips_start = start_join['start_zone'].isna().sum()
outside_trips_end = end_join['end_zone'].isna().sum()

total_outside = outside_trips_start + outside_trips_end

print(f"عدد الرحلات خارج المناطق السكنية: {total_outside}")
عدد الرحلات خارج المناطق السكنية: 1649311
In [ ]:
"""
fig = px.choropleth_mapbox(
    parking_zones_gdf,
    geojson=parking_zones_gdf.geometry.__geo_interface__,
    locations=parking_zones_gdf.index,
    color='trip_count',
    mapbox_style="carto-positron",
    center={"lat": parking_zones_gdf.geometry.centroid.y.mean(), "lon": parking_zones_gdf.geometry.centroid.x.mean()},
    zoom=10,
    color_continuous_scale="YlOrRd",
    title="Geographic Heatmap of Trips per Residential Zone"
)
fig.show()
"""
image_path = r'C:\Users\ASUS\OneDrive\Desktop\d1.png'
image = Image.open(image_path)
display(image)
No description has been provided for this image

شرح المخطط¶

كل مضلع يمثل منطقة سكنية مختلفة و ألوان الخريطة تعبر عن عدد الرحلات اللون الاحمر الداكن يعني مناطق فيها اكبر عدد من الرحلات

للون الاصفر الفاتح يعني مناطق فيها عدد قليل من الرحلات ,

حسب المخطط منستنتج انو معظم الرحلات تتركز بشكل واضح في المناطق المركزية و كلما ابتعدنا عن مركز المدينة منلاحظ انو كثافة الرحلات بتقل بشكل تدريجي

d.2

In [ ]:
"""

usage_sum = mean_daily.groupby('category')['mean_daily_usage'].sum().reset_index()
usage_sum['category'] = pd.Categorical(usage_sum['category'], categories=['رمادي', 'أصفر', 'أحمر'], ordered=True)

fig = px.bar(
    usage_sum,
    x='category',
    y='mean_daily_usage',
    color='category',
    color_discrete_map={'رمادي':'gray', 'أصفر':'yellow', 'أحمر':'red'},
    title='مجموع متوسط الرحلات اليومية لكل فئة من القطاعات الجغرافية',
    labels={'category':'الفئة', 'mean_daily_usage':'مجموع متوسط الرحلات اليومية'}
)
fig.show()
"""
image_path = r'C:\Users\ASUS\OneDrive\Desktop\d2.png'
image = Image.open(image_path)
display(image)
No description has been provided for this image

شرح المخطط¶

• الفئة الرمادية تمثل أعلى مجموع متوسط رحلات يومية بشكل كبير مقارنة بالفئتين الأخريين، حيث يصل المجموع إلى حوالي 22,000 رحلة يوميًا

• الفئة الصفراء تأتي في المرتبة الثانية بحوالي 3,800 رحلة يوميًا

• الفئة الحمراء هي الأقل بحوالي 1,000 رحلة يوميً

d.3

In [ ]:
distance_columns = [
    'distance_to_business_area',
    'start_distance_to_shuttle_m',
    'start_distance_to_metro_m'
]

df_melted = df[distance_columns].melt(
    var_name='distance_type',
    value_name='distance_value'
).dropna()

# custom_colors = {
#     'distance_to_business_area': 'black',
#     'start_distance_to_shuttle_m': 'green',
#     'start_distance_to_metro_m': 'red'
# }

# fig = px.histogram(
#     df_melted,
#     x='distance_value',
#     color='distance_type',
#     nbins=50,
#     barmode='overlay',
#     title='Histogram لتوزيع المسافات المختلفة',
#     color_discrete_map=custom_colors
# )

# fig.update_layout(
#     xaxis_title='المسافة (متر)',
#     yaxis_title='عدد التكرارات',
#     legend_title_text='نوع المسافة',
#     bargap=0.2,
#     template='plotly_white'  
# )

# fig.show()

image_path = r"C:\Users\ASUS\OneDrive\Desktop\images\3.4.png"
image = Image.open(image_path)
display(image)
No description has been provided for this image

شرح المخطط¶

تتركز ملايين التكرارات (كما هو موضح بالارتفاع الهائل للأعمدة عند المسافات الدنيا)

"اقل من 1000 متر"

هذا يشير إلى أن محطات الدراجات تقع في الغالب على مقربة شديدة من المناطق التجارية الرئيسية، ومحطات المترو، ومواقف الحافلات/المكوك

عدد التكرارات ينخفض بشكل كبير جداً كلما زادت المسافة، مما يدل على أن عدد قليل من النقاط يقع على مسافات بعيدة

الخلاصة

يؤكد هذا التوزيع مدى سهولة الوصول وقرب معظم النقاط من المناطق التجارية الرئيسية ومراكز النشاط

d.4

In [ ]:
df.drop(columns=['start_inside_business', 'end_inside_business'], errors='ignore', inplace=True)
df_station_unique = df_station.drop_duplicates(subset='name')

station_business = df_station_unique.set_index('name')['inside_Business']
df['start_inside_business'] = df['start_station_name'].map(station_business)
df['end_inside_business'] = df['end_station_name'].map(station_business)
df['business_trip_type'] = np.select(
    [
        df['start_inside_business'].isna() | df['end_inside_business'].isna(),
        df['start_inside_business'] & df['end_inside_business']
    ],
    [
        'unkhnown',
        'in'
    ],
    default='out'
)
In [ ]:
"""

vc = df['business_trip_type'].value_counts().reset_index()
vc.columns = ['business_trip_type', 'count']

fig = px.bar(
    vc,
    x='business_trip_type',
    y='count',
    labels={'business_trip_type': 'نوع الرحلة', 'count': 'عدد الرحلات'},
    title='توزيع الرحلات حسب المنطقة التجارية',
    template='plotly_white',
    color='business_trip_type',
    color_discrete_sequence=['lightgreen', 'salmon', 'gray']
)

fig.update_layout(showlegend=False)
fig.show()
"""
image_path = r'C:\Users\ASUS\OneDrive\Desktop\d4.png'
image = Image.open(image_path)
display(image)
No description has been provided for this image

شرح المخطط¶

داخل المنطقة التجارية فقط

• الرحلات تبدأ وتنتهي داخل نفس المنطقة

• عددها: أقل من 500,000 رحلة

• تمثل تنقلات محلية وقصيرة

• ملاحظة: منطقي أن تكون قليلة نظرًا لصغر مساحة المنطقة (~6.7 كم²)

من داخل المنطقة إلى خارجها

• تبدأ داخل المنطقة التجارية وتنتهي خارجها

• عددها: أكثر من 4 ملايين رحلة

• تمثل الاستخدام الأكثر شيوعًا: الناس تغادر وسط المدينة إلى مناطق سكنية أو ضواحي

وسط المدينة نقطة انطلاق أساسية للرحلات او من داخل منطقة التجارية لخارجها

موقع غير معروف

• الرحلات التي تفتقد معلومات البداية أو النهاية

• عددها: حوالي 2 مليون رحلة

d.5

In [ ]:
df_inside = df[df['business_trip_type'] == 'in']
counts = df_inside.groupby(['rideable_type', 'member_casual']).size().reset_index(name='count')
"""
fig = px.bar(
    counts,
    x='rideable_type',
    y='count',
    color='member_casual',
    barmode='group',
    title='توزيع الرحلات داخل المنطقة التجارية حسب نوع الدراجة ونوع الاشتراك',
    labels={
        'rideable_type': 'نوع الدراجة',
        'member_casual': 'نوع الاشتراك',
        'count': 'عدد الرحلات'
    },
    template='plotly_white',
    color_discrete_sequence=px.colors.qualitative.Set2
)

fig.show()
"""
image_path = r'C:\Users\ASUS\OneDrive\Desktop\d5.png'
image = Image.open(image_path)
display(image)
No description has been provided for this image

شرح المخطط¶

يتضح أن المستخدمين داخل المنطقة التجارية (Downtown D.C.) يفضلون استخدام الدراجات العادية (classic_bike) على الدراجات الكهربائية (electric_bike)

أسباب منطقية لهذا التفضيل

  1. طبيعة المنطقة الجغرافية

o Downtown D.C. منطقة صغيرة المساحة

o تعاني من ازدحام مروري وكثافة مشاة

o تضم مكاتب حكومية، أسواق، وسياحة – ما يجعل الحركة محدودة وضيقة

  1. خصائص الدراجة العادية

o أقل سرعة → مناسبة للحذر بين المشاة

o أسهل في المناورة داخل الطرق الضيقة أو المزدحمة

o أكثر أمانًا في البيئات الحضرية الكثيفة مقارنة بالكهربائية

  1. خصائص الدراجة الكهربائية

o عادةً تُستخدم في رحلات أطول أو عبر مناطق أقل ازدحامًا

o سرعتها العالية تجعلها أقل ملاءمة للمناطق الداخلية الضيقة

d.6

In [ ]:
bins = [0, 0.005, 0.015, 0.03, df['distance_to_business_area_x'].max() + 0.01]
labels = ['Very Close', 'Close', 'Medium', 'Far']
df['distance_bin'] = pd.cut(df['distance_to_business_area_x'], bins=bins, labels=labels, include_lowest=True)
contingency_table = pd.crosstab(df['distance_bin'], df['member_casual'])
print(contingency_table)
chi2, p, dof, expected = chi2_contingency(contingency_table)
print("Chi-square value:", chi2)
print("P-value:", p)

if p < 0.05:
    print("يوجد ارتباط احصائي بين المسافة ونوع الاشتراك")
else:
    print("لا يوجد ارتباط احصائي بين المسافة ونوع الاشتراك")
member_casual  casual   member
distance_bin                  
Very Close     576480  1025146
Close          463104   916950
Medium         276155   565130
Far            304386   443677
Chi-square value: 13877.358622143238
P-value: 0.0
يوجد ارتباط احصائي بين المسافة ونوع الاشتراك

e

e.1

In [165]:
weather_melted = weather_df.melt(
    id_vars='datetime',
    value_vars=['temp', 'humidity', 'windspeed'],
    var_name='Variable',
    value_name='Value'
)
In [175]:
"""
fig = px.line(
    weather_melted,
    x='datetime',
    y='Value',
    color='Variable',
    title='Daily Averages: Temperature, Humidity, and Wind Speed'
)

fig.update_layout(template='plotly_white')
"""
image_path = r'C:\Users\ASUS\OneDrive\Desktop\e1.png'

image = Image.open(image_path)
display(image)
No description has been provided for this image

شرح المخطط¶

(temp) درجة الحرارة

بترتفع في فصل الصيف بين مايو وسبتمبر تقريبا وبتنخفض في الشتاء بين ديسمبر وفبراير

(humidity) الرطوبة

لا يوجد نمط موسمي قوي

(windspeed) سرعة الرياح

تظهر بعض القيم العالية المفاجئة (قد تكون بسبب عواصف أو أيام رياح قوية)، لكن بشكل عام ثابتة نسبياً مقارنة بالحرارة

e.2

In [ ]:
df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')
df['date'] = df['ended_at'].dt.date
daily_revenue_df = df.groupby('date')['total_cost'].sum().reset_index()
daily_revenue_df.rename(columns={'total_cost': 'total_revenue'}, inplace=True)

weather_df['date'] = weather_df['datetime'].dt.date
daily_revenue_df = daily_revenue_df.merge(
    weather_df[['date', 'weather_bin']],
    on='date',
    how='left'
)
In [177]:
"""
fig = px.box(
    daily_revenue_df,
    x='weather_bin',
    y='total_revenue',
    color='weather_bin',
    title='Daily Revenue by Weather Condition',
    labels={'weather_bin': 'Weather Condition', 'total_revenue': 'Total Revenue'},
    template='plotly_white'
)
fig.show()
"""
image_path = r'C:\Users\ASUS\OneDrive\Desktop\e2.png'

image = Image.open(image_path)
display(image)
No description has been provided for this image

شرح المخطط¶

Sunny¶

k70 يظهر استقرار جيد لكن مع وجود بعض القيم المنخفضة والقيم الأعلى تصل (حوالي 50K - 55K) اعلى الإيرادات بشكل عام، والوسيط مرتفع

Rainy¶

k40 الايرادات أيضا جيدة لكن اقل من المشمس بقليل. الوسيط تقريبا حول

Cloudy¶

اقل الإيرادات بشكل واضح والتوزيع بالكامل منخفض مقارنة بباقي الحالات

e.3

In [ ]:
weather_df['date'] = weather_df['datetime'].dt.date
merged_df = daily_revenue_df.merge(
    weather_df[['date', 'temp', 'humidity']],
    on='date',
    how='left'
)
correlation_temp = merged_df['total_revenue'].corr(merged_df['temp'])
correlation_humidity = merged_df['total_revenue'].corr(merged_df['humidity'])

print("معامل الارتباط بين الإيرادات ودرجة الحرارة:", correlation_temp)
print("معامل الارتباط بين الإيرادات والرطوبة:", correlation_humidity)
معامل الارتباط بين الإيرادات ودرجة الحرارة: 0.6889976439843918
معامل الارتباط بين الإيرادات والرطوبة: -0.15588133323001235

النتيجة:

العلاقة بين الايرادات و درجة الحرارة علاقة طردية قوية يعني كلما ارتفعت درجة الحرارة زادت الايرادات اليومية

العلاقة بين الايرادات و الرطوبة علاقة عكسية ضعيفة يعني كلما زادت الرطوبة انخفضت الايرادات بشكل طفيف

e.4

شرح:

In [ ]:
df['date'] = pd.to_datetime(df['ended_at']).dt.date
weather_df['date'] = pd.to_datetime(weather_df['datetime']).dt.date
merged_df = df.merge(weather_df[['date', 'weather_bin']], on='date', how='left')
In [169]:
contingency_table = pd.crosstab(merged_df['weather_bin'], merged_df['rideable_type'])
print(contingency_table)

chi2, p, dof, expected = chi2_contingency(contingency_table)

print("Chi-square value:", chi2)
print("P-value:", p)

if p < 0.05:
    print("يوجد ارتباط إحصائي بين حالة الطقس ونوع الدراجة")
else:
    print("لا يوجد ارتباط إحصائي بين حالة الطقس ونوع الدراجة")
rideable_type  classic_bike   electric_bike
weather_bin                                
Cloudy                 19977          31307
Rainy                 909763        1142249
Sunny                1777651        2222795
Chi-square value: 617.2363368229555
P-value: 9.307487117997466e-135
يوجد ارتباط إحصائي بين حالة الطقس ونوع الدراجة

3 . اصطياد الانماط

a. 1

In [170]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6103742 entries, 0 to 6103741
Data columns (total 64 columns):
 #   Column                       Dtype         
---  ------                       -----         
 0   ride_id                      string        
 1   rideable_type                string        
 2   started_at                   datetime64[ns]
 3   ended_at                     datetime64[ns]
 4   start_station_name           object        
 5   start_station_id             Int64         
 6   end_station_name             object        
 7   end_station_id               Int64         
 8   start_lat_x                  float64       
 9   start_lng_x                  float64       
 10  end_lat_x                    float64       
 11  end_lng_x                    float64       
 12  member_casual                string        
 13  start_lat_y                  float64       
 14  start_lng_y                  float64       
 15  end_lat_y                    float64       
 16  end_lng_y                    float64       
 17  start_year                   int32         
 18  end_year                     int32         
 19  start_month                  int32         
 20  end_month                    int32         
 21  start_day                    int32         
 22  end_day                      int32         
 23  start_day_name               object        
 24  end_day_name                 object        
 25  start_hour_am_pm             object        
 26  end_hour_am_pm               object        
 27  ride_duration_min            float64       
 28  trip_in_business_area        bool          
 29  base_cost                    float64       
 30  extra_time_fee               float64       
 31  long_trip_fee                float64       
 32  business_area_fee            float64       
 33  total_cost                   float64       
 34  start_distance_to_shuttle_m  float64       
 35  end_distance_to_shuttle_m    float64       
 36  start_nearest_shuttle_id     float64       
 37  end_nearest_shuttle_id       float64       
 38  start_distance_to_metro_m    float64       
 39  end_distance_to_metro_m      float64       
 40  start_nearest_metro_id       float64       
 41  end_nearest_metro_id         float64       
 42  distance_to_business_area_x  float64       
 43  close_to_business_area_x     object        
 44  distance_to_business_area_y  float64       
 45  close_to_business_area_y     object        
 46  distance_to_business_area    float64       
 47  close_to_business_area       float64       
 48  start_geohash                object        
 49  end_geohash                  object        
 50  start_date                   datetime64[ns]
 51  end_date                     object        
 52  start_capacity_bin           object        
 53  end_capacity_bin             object        
 54  duration_category            category      
 55  total_category               category      
 56  start_week                   datetime64[ns]
 57  start_geometry               object        
 58  end_geometry                 object        
 59  start_inside_business        object        
 60  end_inside_business          object        
 61  business_trip_type           object        
 62  distance_bin                 category      
 63  date                         object        
dtypes: Int64(2), bool(1), category(3), datetime64[ns](4), float64(26), int32(6), object(19), string(3)
memory usage: 2.6+ GB
In [171]:
weather_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 366 entries, 0 to 365
Data columns (total 17 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   name           366 non-null    object        
 1   datetime       366 non-null    datetime64[ns]
 2   tempmax        366 non-null    float64       
 3   tempmin        366 non-null    float64       
 4   temp           366 non-null    float64       
 5   humidity       366 non-null    float64       
 6   windspeed      366 non-null    float64       
 7   windspeedmax   366 non-null    float64       
 8   windspeedmean  366 non-null    float64       
 9   windspeedmin   366 non-null    float64       
 10  cloudcover     366 non-null    float64       
 11  sunrise        366 non-null    datetime64[ns]
 12  sunset         366 non-null    datetime64[ns]
 13  conditions     366 non-null    object        
 14  temp_range     366 non-null    category      
 15  weather_bin    366 non-null    object        
 16  date           366 non-null    object        
dtypes: category(1), datetime64[ns](3), float64(9), object(4)
memory usage: 46.6+ KB
In [172]:
daily_revenue = daily_revenue.sort_values('start_date')

ايرادات مستقبلية اضافية اي بعد 2024

In [173]:
last_10_days = daily_revenue['total_cost'].tail(10).reset_index(drop=True)

daily_diffs = last_10_days.diff().dropna()
avg_daily_change = daily_diffs.mean()

last_value = last_10_days.iloc[-1]

baseline_values = [last_value + avg_daily_change * (i + 1) for i in range(10)]

last_date = daily_revenue['start_date'].max()
future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=10)

baseline_forecast_linear = pd.DataFrame({
    'start_date': future_dates,
    'baseline_total_cost': baseline_values
})
baseline_forecast_linear
Out[173]:
start_date baseline_total_cost
0 2025-01-01 46992.011111
1 2025-01-02 48900.342222
2 2025-01-03 50808.673333
3 2025-01-04 52717.004444
4 2025-01-05 54625.335556
5 2025-01-06 56533.666667
6 2025-01-07 58441.997778
7 2025-01-08 60350.328889
8 2025-01-09 62258.660000
9 2025-01-10 64166.991111
In [ ]:
import plotly.express as px
import pandas as pd

daily_revenue_renamed = daily_revenue.rename(columns={'total_cost': 'baseline_total_cost'})
daily_revenue_renamed['type'] = 'actual'
baseline_forecast_linear['type'] = 'linear_baseline'
full_df = pd.concat([
    daily_revenue_renamed[['start_date', 'baseline_total_cost', 'type']],
    baseline_forecast_linear
    
], ignore_index=True)

# fig = px.line(
#     full_df,
#     x='start_date',
#     y='baseline_total_cost',
#     color='type',
#     title='Actual vs. Linear Baseline',
#     markers=True
# )

# fig.update_layout(
#     xaxis_title='Date',
#     yaxis_title='Total Cost',
#     legend_title='Data Type',
#     hovermode='x unified'
# )

# fig.show()
image_path = r"C:\Users\ASUS\OneDrive\Desktop\images\10.png"
image = Image.open(image_path)
display(image)
No description has been provided for this image
In [174]:
df_prophet = daily_revenue[['start_date', 'total_cost']].rename(
    columns={'start_date': 'ds', 'total_cost': 'y'}
)
In [175]:
from prophet import Prophet

model = Prophet()
model.fit(df_prophet)
#seasonality_mode='multiplicative'
01:34:14 - cmdstanpy - INFO - Chain [1] start processing
01:34:14 - cmdstanpy - INFO - Chain [1] done processing
Out[175]:
<prophet.forecaster.Prophet at 0x24530362310>
In [176]:
future = model.make_future_dataframe(periods=10)  
In [ ]:
forecast = model.predict(future)
In [191]:
from prophet.plot import plot_plotly
import plotly.graph_objects as go

fig = plot_plotly(model, forecast)
# fig.update_layout(title='Prophet Forecast for Total Cost')
# fig.show()
image_path = r"C:\Users\ASUS\OneDrive\Desktop\images\Screenshot 2025-06-20 193703.png"

image = Image.open(image_path)
display(image)
No description has been provided for this image

......Tuning.......

.اختيار نوع الموسمية1

  1. تعديل مرونة الاتجاه
  1. عدد نقاط التغير في الاتجاه

4.إضافة seasonalities مخصصة

In [ ]:
results = []
train = df_prophet.iloc[:-10]  
test = df_prophet.iloc[-10:]
seasonality_modes = ['additive', 'multiplicative']

changepoint_values=[ 0.001,0.1, 0.3, 0.5,0.8,0.9]
n_changepoints=[50,60,70,80,90]
fouriers=[5,10,15,20]
for cp in tqdm(changepoint_values):
    for mode in seasonality_modes:
        for n in n_changepoints:
            for f in fouriers:
                model = Prophet(weekly_seasonality=False ,changepoint_prior_scale=cp, seasonality_mode=mode, n_changepoints= n)
                model.add_seasonality(name='weekly', period=7, fourier_order=f)
                model.fit(train)
                
                train_pred = model.predict(train[['ds']])
                rmse_train = np.sqrt(mean_squared_error(train['y'], train_pred['yhat']))
                relative_error_train=(rmse_train / train['y'].mean()) * 100
                future = model.make_future_dataframe(periods=10)
                forecast = model.predict(future)
                y_pred_test = forecast[['ds', 'yhat']].set_index('ds').loc[test['ds']]
                rmse_test = np.sqrt(mean_squared_error(test['y'], y_pred_test['yhat']))
                
                
                relative_error_test = (rmse_test / test['y'].mean()) * 100
                
                results.append({
                    'changepoint_prior_scale': cp,
                    'seasonality_mode': mode,
                    'n_changepoints':n,
                    'forier_id':f,
                    'relative_error_train%':relative_error_train,
                    'relative_rmse_test_%': relative_error_test
                })

df_results = pd.DataFrame(results).sort_values(by='relative_rmse_test_%')
print(df_results)
  0%|          | 0/6 [00:00<?, ?it/s]01:34:31 - cmdstanpy - INFO - Chain [1] start processing
01:34:31 - cmdstanpy - INFO - Chain [1] done processing
01:34:31 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:34:31 - cmdstanpy - INFO - Chain [1] start processing
01:34:31 - cmdstanpy - INFO - Chain [1] done processing
01:34:32 - cmdstanpy - INFO - Chain [1] start processing
01:34:32 - cmdstanpy - INFO - Chain [1] done processing
01:34:32 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:34:32 - cmdstanpy - INFO - Chain [1] start processing
01:34:32 - cmdstanpy - INFO - Chain [1] done processing
01:34:33 - cmdstanpy - INFO - Chain [1] start processing
01:34:33 - cmdstanpy - INFO - Chain [1] done processing
01:34:33 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:34:33 - cmdstanpy - INFO - Chain [1] start processing
01:34:33 - cmdstanpy - INFO - Chain [1] done processing
01:34:34 - cmdstanpy - INFO - Chain [1] start processing
01:34:34 - cmdstanpy - INFO - Chain [1] done processing
01:34:34 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:34:34 - cmdstanpy - INFO - Chain [1] start processing
01:34:35 - cmdstanpy - INFO - Chain [1] done processing
01:34:35 - cmdstanpy - INFO - Chain [1] start processing
01:34:35 - cmdstanpy - INFO - Chain [1] done processing
01:34:35 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:34:35 - cmdstanpy - INFO - Chain [1] start processing
01:34:35 - cmdstanpy - INFO - Chain [1] done processing
01:34:36 - cmdstanpy - INFO - Chain [1] start processing
01:34:36 - cmdstanpy - INFO - Chain [1] done processing
01:34:36 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:34:36 - cmdstanpy - INFO - Chain [1] start processing
01:34:37 - cmdstanpy - INFO - Chain [1] done processing
01:34:37 - cmdstanpy - INFO - Chain [1] start processing
01:34:37 - cmdstanpy - INFO - Chain [1] done processing
01:34:37 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:34:37 - cmdstanpy - INFO - Chain [1] start processing
01:34:38 - cmdstanpy - INFO - Chain [1] done processing
01:34:38 - cmdstanpy - INFO - Chain [1] start processing
01:34:38 - cmdstanpy - INFO - Chain [1] done processing
01:34:38 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:34:38 - cmdstanpy - INFO - Chain [1] start processing
01:34:39 - cmdstanpy - INFO - Chain [1] done processing
01:34:39 - cmdstanpy - INFO - Chain [1] start processing
01:34:39 - cmdstanpy - INFO - Chain [1] done processing
01:34:39 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:34:39 - cmdstanpy - INFO - Chain [1] start processing
01:34:40 - cmdstanpy - INFO - Chain [1] done processing
01:34:40 - cmdstanpy - INFO - Chain [1] start processing
01:34:40 - cmdstanpy - INFO - Chain [1] done processing
01:34:40 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:34:40 - cmdstanpy - INFO - Chain [1] start processing
01:34:41 - cmdstanpy - INFO - Chain [1] done processing
01:34:41 - cmdstanpy - INFO - Chain [1] start processing
01:34:41 - cmdstanpy - INFO - Chain [1] done processing
01:34:41 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:34:41 - cmdstanpy - INFO - Chain [1] start processing
01:34:42 - cmdstanpy - INFO - Chain [1] done processing
01:34:42 - cmdstanpy - INFO - Chain [1] start processing
01:34:42 - cmdstanpy - INFO - Chain [1] done processing
01:34:42 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:34:42 - cmdstanpy - INFO - Chain [1] start processing
01:34:44 - cmdstanpy - INFO - Chain [1] done processing
01:34:44 - cmdstanpy - INFO - Chain [1] start processing
01:34:44 - cmdstanpy - INFO - Chain [1] done processing
01:34:44 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:34:44 - cmdstanpy - INFO - Chain [1] start processing
01:34:45 - cmdstanpy - INFO - Chain [1] done processing
01:34:45 - cmdstanpy - INFO - Chain [1] start processing
01:34:45 - cmdstanpy - INFO - Chain [1] done processing
01:34:45 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:34:45 - cmdstanpy - INFO - Chain [1] start processing
01:34:47 - cmdstanpy - INFO - Chain [1] done processing
01:34:47 - cmdstanpy - INFO - Chain [1] start processing
01:34:47 - cmdstanpy - INFO - Chain [1] done processing
01:34:47 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:34:47 - cmdstanpy - INFO - Chain [1] start processing
01:34:48 - cmdstanpy - INFO - Chain [1] done processing
01:34:48 - cmdstanpy - INFO - Chain [1] start processing
01:34:48 - cmdstanpy - INFO - Chain [1] done processing
01:34:48 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:34:48 - cmdstanpy - INFO - Chain [1] start processing
01:34:50 - cmdstanpy - INFO - Chain [1] done processing
01:34:50 - cmdstanpy - INFO - Chain [1] start processing
01:34:50 - cmdstanpy - INFO - Chain [1] done processing
01:34:50 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:34:50 - cmdstanpy - INFO - Chain [1] start processing
01:34:51 - cmdstanpy - INFO - Chain [1] done processing
01:34:51 - cmdstanpy - INFO - Chain [1] start processing
01:34:51 - cmdstanpy - INFO - Chain [1] done processing
01:34:51 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:34:51 - cmdstanpy - INFO - Chain [1] start processing
01:34:53 - cmdstanpy - INFO - Chain [1] done processing
01:34:53 - cmdstanpy - INFO - Chain [1] start processing
01:34:53 - cmdstanpy - INFO - Chain [1] done processing
01:34:53 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:34:53 - cmdstanpy - INFO - Chain [1] start processing
01:34:54 - cmdstanpy - INFO - Chain [1] done processing
01:34:55 - cmdstanpy - INFO - Chain [1] start processing
01:34:55 - cmdstanpy - INFO - Chain [1] done processing
01:34:55 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:34:55 - cmdstanpy - INFO - Chain [1] start processing
01:34:56 - cmdstanpy - INFO - Chain [1] done processing
01:34:56 - cmdstanpy - INFO - Chain [1] start processing
01:34:56 - cmdstanpy - INFO - Chain [1] done processing
01:34:56 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:34:56 - cmdstanpy - INFO - Chain [1] start processing
01:34:57 - cmdstanpy - INFO - Chain [1] done processing
01:34:57 - cmdstanpy - INFO - Chain [1] start processing
01:34:57 - cmdstanpy - INFO - Chain [1] done processing
01:34:57 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:34:57 - cmdstanpy - INFO - Chain [1] start processing
01:34:57 - cmdstanpy - INFO - Chain [1] done processing
01:34:58 - cmdstanpy - INFO - Chain [1] start processing
01:34:58 - cmdstanpy - INFO - Chain [1] done processing
01:34:58 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:34:58 - cmdstanpy - INFO - Chain [1] start processing
01:34:58 - cmdstanpy - INFO - Chain [1] done processing
01:34:59 - cmdstanpy - INFO - Chain [1] start processing
01:34:59 - cmdstanpy - INFO - Chain [1] done processing
01:34:59 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:34:59 - cmdstanpy - INFO - Chain [1] start processing
01:35:00 - cmdstanpy - INFO - Chain [1] done processing
01:35:00 - cmdstanpy - INFO - Chain [1] start processing
01:35:00 - cmdstanpy - INFO - Chain [1] done processing
01:35:00 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:35:00 - cmdstanpy - INFO - Chain [1] start processing
01:35:00 - cmdstanpy - INFO - Chain [1] done processing
01:35:01 - cmdstanpy - INFO - Chain [1] start processing
01:35:01 - cmdstanpy - INFO - Chain [1] done processing
01:35:01 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:35:01 - cmdstanpy - INFO - Chain [1] start processing
01:35:01 - cmdstanpy - INFO - Chain [1] done processing
01:35:02 - cmdstanpy - INFO - Chain [1] start processing
01:35:02 - cmdstanpy - INFO - Chain [1] done processing
01:35:02 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:35:02 - cmdstanpy - INFO - Chain [1] start processing
01:35:03 - cmdstanpy - INFO - Chain [1] done processing
01:35:03 - cmdstanpy - INFO - Chain [1] start processing
01:35:03 - cmdstanpy - INFO - Chain [1] done processing
01:35:03 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:35:03 - cmdstanpy - INFO - Chain [1] start processing
01:35:04 - cmdstanpy - INFO - Chain [1] done processing
01:35:04 - cmdstanpy - INFO - Chain [1] start processing
01:35:05 - cmdstanpy - INFO - Chain [1] done processing
01:35:05 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:35:05 - cmdstanpy - INFO - Chain [1] start processing
01:35:05 - cmdstanpy - INFO - Chain [1] done processing
01:35:06 - cmdstanpy - INFO - Chain [1] start processing
01:35:06 - cmdstanpy - INFO - Chain [1] done processing
01:35:06 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:35:06 - cmdstanpy - INFO - Chain [1] start processing
01:35:06 - cmdstanpy - INFO - Chain [1] done processing
01:35:07 - cmdstanpy - INFO - Chain [1] start processing
01:35:07 - cmdstanpy - INFO - Chain [1] done processing
01:35:07 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:35:07 - cmdstanpy - INFO - Chain [1] start processing
01:35:08 - cmdstanpy - INFO - Chain [1] done processing
01:35:08 - cmdstanpy - INFO - Chain [1] start processing
01:35:08 - cmdstanpy - INFO - Chain [1] done processing
01:35:08 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:35:08 - cmdstanpy - INFO - Chain [1] start processing
01:35:09 - cmdstanpy - INFO - Chain [1] done processing
01:35:10 - cmdstanpy - INFO - Chain [1] start processing
01:35:10 - cmdstanpy - INFO - Chain [1] done processing
01:35:10 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:35:10 - cmdstanpy - INFO - Chain [1] start processing
01:35:11 - cmdstanpy - INFO - Chain [1] done processing
01:35:11 - cmdstanpy - INFO - Chain [1] start processing
01:35:11 - cmdstanpy - INFO - Chain [1] done processing
01:35:11 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:35:11 - cmdstanpy - INFO - Chain [1] start processing
01:35:12 - cmdstanpy - INFO - Chain [1] done processing
01:35:12 - cmdstanpy - INFO - Chain [1] start processing
01:35:12 - cmdstanpy - INFO - Chain [1] done processing
01:35:12 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:35:12 - cmdstanpy - INFO - Chain [1] start processing
01:35:13 - cmdstanpy - INFO - Chain [1] done processing
01:35:13 - cmdstanpy - INFO - Chain [1] start processing
01:35:13 - cmdstanpy - INFO - Chain [1] done processing
01:35:13 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:35:13 - cmdstanpy - INFO - Chain [1] start processing
01:35:15 - cmdstanpy - INFO - Chain [1] done processing
01:35:15 - cmdstanpy - INFO - Chain [1] start processing
01:35:15 - cmdstanpy - INFO - Chain [1] done processing
01:35:15 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:35:15 - cmdstanpy - INFO - Chain [1] start processing
01:35:16 - cmdstanpy - INFO - Chain [1] done processing
01:35:17 - cmdstanpy - INFO - Chain [1] start processing
01:35:17 - cmdstanpy - INFO - Chain [1] done processing
01:35:17 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:35:17 - cmdstanpy - INFO - Chain [1] start processing
01:35:18 - cmdstanpy - INFO - Chain [1] done processing
01:35:19 - cmdstanpy - INFO - Chain [1] start processing
01:35:19 - cmdstanpy - INFO - Chain [1] done processing
01:35:19 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:35:19 - cmdstanpy - INFO - Chain [1] start processing
01:35:20 - cmdstanpy - INFO - Chain [1] done processing
01:35:20 - cmdstanpy - INFO - Chain [1] start processing
01:35:20 - cmdstanpy - INFO - Chain [1] done processing
01:35:20 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:35:20 - cmdstanpy - INFO - Chain [1] start processing
01:35:23 - cmdstanpy - INFO - Chain [1] done processing
 17%|█▋        | 1/6 [00:52<04:20, 52.07s/it]01:35:23 - cmdstanpy - INFO - Chain [1] start processing
01:35:23 - cmdstanpy - INFO - Chain [1] done processing
01:35:23 - cmdstanpy - INFO - Chain [1] start processing
01:35:23 - cmdstanpy - INFO - Chain [1] done processing
01:35:24 - cmdstanpy - INFO - Chain [1] start processing
01:35:24 - cmdstanpy - INFO - Chain [1] done processing
01:35:24 - cmdstanpy - INFO - Chain [1] start processing
01:35:24 - cmdstanpy - INFO - Chain [1] done processing
01:35:24 - cmdstanpy - INFO - Chain [1] start processing
01:35:24 - cmdstanpy - INFO - Chain [1] done processing
01:35:24 - cmdstanpy - INFO - Chain [1] start processing
01:35:24 - cmdstanpy - INFO - Chain [1] done processing
01:35:25 - cmdstanpy - INFO - Chain [1] start processing
01:35:25 - cmdstanpy - INFO - Chain [1] done processing
01:35:25 - cmdstanpy - INFO - Chain [1] start processing
01:35:25 - cmdstanpy - INFO - Chain [1] done processing
01:35:25 - cmdstanpy - INFO - Chain [1] start processing
01:35:25 - cmdstanpy - INFO - Chain [1] done processing
01:35:25 - cmdstanpy - INFO - Chain [1] start processing
01:35:25 - cmdstanpy - INFO - Chain [1] done processing
01:35:26 - cmdstanpy - INFO - Chain [1] start processing
01:35:26 - cmdstanpy - INFO - Chain [1] done processing
01:35:26 - cmdstanpy - INFO - Chain [1] start processing
01:35:26 - cmdstanpy - INFO - Chain [1] done processing
01:35:26 - cmdstanpy - INFO - Chain [1] start processing
01:35:26 - cmdstanpy - INFO - Chain [1] done processing
01:35:27 - cmdstanpy - INFO - Chain [1] start processing
01:35:27 - cmdstanpy - INFO - Chain [1] done processing
01:35:27 - cmdstanpy - INFO - Chain [1] start processing
01:35:27 - cmdstanpy - INFO - Chain [1] done processing
01:35:27 - cmdstanpy - INFO - Chain [1] start processing
01:35:27 - cmdstanpy - INFO - Chain [1] done processing
01:35:27 - cmdstanpy - INFO - Chain [1] start processing
01:35:27 - cmdstanpy - INFO - Chain [1] done processing
01:35:28 - cmdstanpy - INFO - Chain [1] start processing
01:35:28 - cmdstanpy - INFO - Chain [1] done processing
01:35:28 - cmdstanpy - INFO - Chain [1] start processing
01:35:28 - cmdstanpy - INFO - Chain [1] done processing
01:35:28 - cmdstanpy - INFO - Chain [1] start processing
01:35:28 - cmdstanpy - INFO - Chain [1] done processing
01:35:28 - cmdstanpy - INFO - Chain [1] start processing
01:35:29 - cmdstanpy - INFO - Chain [1] done processing
01:35:29 - cmdstanpy - INFO - Chain [1] start processing
01:35:29 - cmdstanpy - INFO - Chain [1] done processing
01:35:29 - cmdstanpy - INFO - Chain [1] start processing
01:35:29 - cmdstanpy - INFO - Chain [1] done processing
01:35:29 - cmdstanpy - INFO - Chain [1] start processing
01:35:29 - cmdstanpy - INFO - Chain [1] done processing
01:35:30 - cmdstanpy - INFO - Chain [1] start processing
01:35:30 - cmdstanpy - INFO - Chain [1] done processing
01:35:30 - cmdstanpy - INFO - Chain [1] start processing
01:35:30 - cmdstanpy - INFO - Chain [1] done processing
01:35:30 - cmdstanpy - INFO - Chain [1] start processing
01:35:30 - cmdstanpy - INFO - Chain [1] done processing
01:35:30 - cmdstanpy - INFO - Chain [1] start processing
01:35:31 - cmdstanpy - INFO - Chain [1] done processing
01:35:31 - cmdstanpy - INFO - Chain [1] start processing
01:35:31 - cmdstanpy - INFO - Chain [1] done processing
01:35:31 - cmdstanpy - INFO - Chain [1] start processing
01:35:31 - cmdstanpy - INFO - Chain [1] done processing
01:35:31 - cmdstanpy - INFO - Chain [1] start processing
01:35:31 - cmdstanpy - INFO - Chain [1] done processing
01:35:32 - cmdstanpy - INFO - Chain [1] start processing
01:35:32 - cmdstanpy - INFO - Chain [1] done processing
01:35:32 - cmdstanpy - INFO - Chain [1] start processing
01:35:32 - cmdstanpy - INFO - Chain [1] done processing
01:35:32 - cmdstanpy - INFO - Chain [1] start processing
01:35:32 - cmdstanpy - INFO - Chain [1] done processing
01:35:32 - cmdstanpy - INFO - Chain [1] start processing
01:35:33 - cmdstanpy - INFO - Chain [1] done processing
01:35:33 - cmdstanpy - INFO - Chain [1] start processing
01:35:33 - cmdstanpy - INFO - Chain [1] done processing
01:35:33 - cmdstanpy - INFO - Chain [1] start processing
01:35:33 - cmdstanpy - INFO - Chain [1] done processing
01:35:33 - cmdstanpy - INFO - Chain [1] start processing
01:35:33 - cmdstanpy - INFO - Chain [1] done processing
01:35:34 - cmdstanpy - INFO - Chain [1] start processing
01:35:34 - cmdstanpy - INFO - Chain [1] done processing
01:35:34 - cmdstanpy - INFO - Chain [1] start processing
01:35:34 - cmdstanpy - INFO - Chain [1] done processing
 33%|███▎      | 2/6 [01:03<01:52, 28.06s/it]01:35:34 - cmdstanpy - INFO - Chain [1] start processing
01:35:34 - cmdstanpy - INFO - Chain [1] done processing
01:35:35 - cmdstanpy - INFO - Chain [1] start processing
01:35:35 - cmdstanpy - INFO - Chain [1] done processing
01:35:35 - cmdstanpy - INFO - Chain [1] start processing
01:35:35 - cmdstanpy - INFO - Chain [1] done processing
01:35:35 - cmdstanpy - INFO - Chain [1] start processing
01:35:35 - cmdstanpy - INFO - Chain [1] done processing
01:35:35 - cmdstanpy - INFO - Chain [1] start processing
01:35:35 - cmdstanpy - INFO - Chain [1] done processing
01:35:36 - cmdstanpy - INFO - Chain [1] start processing
01:35:36 - cmdstanpy - INFO - Chain [1] done processing
01:35:36 - cmdstanpy - INFO - Chain [1] start processing
01:35:36 - cmdstanpy - INFO - Chain [1] done processing
01:35:36 - cmdstanpy - INFO - Chain [1] start processing
01:35:36 - cmdstanpy - INFO - Chain [1] done processing
01:35:37 - cmdstanpy - INFO - Chain [1] start processing
01:35:37 - cmdstanpy - INFO - Chain [1] done processing
01:35:37 - cmdstanpy - INFO - Chain [1] start processing
01:35:37 - cmdstanpy - INFO - Chain [1] done processing
01:35:37 - cmdstanpy - INFO - Chain [1] start processing
01:35:37 - cmdstanpy - INFO - Chain [1] done processing
01:35:37 - cmdstanpy - INFO - Chain [1] start processing
01:35:37 - cmdstanpy - INFO - Chain [1] done processing
01:35:38 - cmdstanpy - INFO - Chain [1] start processing
01:35:38 - cmdstanpy - INFO - Chain [1] done processing
01:35:38 - cmdstanpy - INFO - Chain [1] start processing
01:35:38 - cmdstanpy - INFO - Chain [1] done processing
01:35:38 - cmdstanpy - INFO - Chain [1] start processing
01:35:38 - cmdstanpy - INFO - Chain [1] done processing
01:35:39 - cmdstanpy - INFO - Chain [1] start processing
01:35:39 - cmdstanpy - INFO - Chain [1] done processing
01:35:39 - cmdstanpy - INFO - Chain [1] start processing
01:35:39 - cmdstanpy - INFO - Chain [1] done processing
01:35:39 - cmdstanpy - INFO - Chain [1] start processing
01:35:39 - cmdstanpy - INFO - Chain [1] done processing
01:35:39 - cmdstanpy - INFO - Chain [1] start processing
01:35:39 - cmdstanpy - INFO - Chain [1] done processing
01:35:40 - cmdstanpy - INFO - Chain [1] start processing
01:35:40 - cmdstanpy - INFO - Chain [1] done processing
01:35:40 - cmdstanpy - INFO - Chain [1] start processing
01:35:40 - cmdstanpy - INFO - Chain [1] done processing
01:35:40 - cmdstanpy - INFO - Chain [1] start processing
01:35:41 - cmdstanpy - INFO - Chain [1] done processing
01:35:41 - cmdstanpy - INFO - Chain [1] start processing
01:35:41 - cmdstanpy - INFO - Chain [1] done processing
01:35:41 - cmdstanpy - INFO - Chain [1] start processing
01:35:42 - cmdstanpy - INFO - Chain [1] done processing
01:35:42 - cmdstanpy - INFO - Chain [1] start processing
01:35:42 - cmdstanpy - INFO - Chain [1] done processing
01:35:42 - cmdstanpy - INFO - Chain [1] start processing
01:35:42 - cmdstanpy - INFO - Chain [1] done processing
01:35:42 - cmdstanpy - INFO - Chain [1] start processing
01:35:43 - cmdstanpy - INFO - Chain [1] done processing
01:35:43 - cmdstanpy - INFO - Chain [1] start processing
01:35:43 - cmdstanpy - INFO - Chain [1] done processing
01:35:44 - cmdstanpy - INFO - Chain [1] start processing
01:35:44 - cmdstanpy - INFO - Chain [1] done processing
01:35:44 - cmdstanpy - INFO - Chain [1] start processing
01:35:44 - cmdstanpy - INFO - Chain [1] done processing
01:35:44 - cmdstanpy - INFO - Chain [1] start processing
01:35:45 - cmdstanpy - INFO - Chain [1] done processing
01:35:45 - cmdstanpy - INFO - Chain [1] start processing
01:35:45 - cmdstanpy - INFO - Chain [1] done processing
01:35:46 - cmdstanpy - INFO - Chain [1] start processing
01:35:46 - cmdstanpy - INFO - Chain [1] done processing
01:35:46 - cmdstanpy - INFO - Chain [1] start processing
01:35:46 - cmdstanpy - INFO - Chain [1] done processing
01:35:46 - cmdstanpy - INFO - Chain [1] start processing
01:35:47 - cmdstanpy - INFO - Chain [1] done processing
01:35:47 - cmdstanpy - INFO - Chain [1] start processing
01:35:47 - cmdstanpy - INFO - Chain [1] done processing
01:35:48 - cmdstanpy - INFO - Chain [1] start processing
01:35:48 - cmdstanpy - INFO - Chain [1] done processing
01:35:48 - cmdstanpy - INFO - Chain [1] start processing
01:35:48 - cmdstanpy - INFO - Chain [1] done processing
01:35:48 - cmdstanpy - INFO - Chain [1] start processing
01:35:49 - cmdstanpy - INFO - Chain [1] done processing
01:35:49 - cmdstanpy - INFO - Chain [1] start processing
01:35:50 - cmdstanpy - INFO - Chain [1] done processing
 50%|█████     | 3/6 [01:18<01:06, 22.32s/it]01:35:50 - cmdstanpy - INFO - Chain [1] start processing
01:35:50 - cmdstanpy - INFO - Chain [1] done processing
01:35:50 - cmdstanpy - INFO - Chain [1] start processing
01:35:50 - cmdstanpy - INFO - Chain [1] done processing
01:35:50 - cmdstanpy - INFO - Chain [1] start processing
01:35:50 - cmdstanpy - INFO - Chain [1] done processing
01:35:51 - cmdstanpy - INFO - Chain [1] start processing
01:35:51 - cmdstanpy - INFO - Chain [1] done processing
01:35:51 - cmdstanpy - INFO - Chain [1] start processing
01:35:51 - cmdstanpy - INFO - Chain [1] done processing
01:35:51 - cmdstanpy - INFO - Chain [1] start processing
01:35:51 - cmdstanpy - INFO - Chain [1] done processing
01:35:51 - cmdstanpy - INFO - Chain [1] start processing
01:35:52 - cmdstanpy - INFO - Chain [1] done processing
01:35:52 - cmdstanpy - INFO - Chain [1] start processing
01:35:52 - cmdstanpy - INFO - Chain [1] done processing
01:35:52 - cmdstanpy - INFO - Chain [1] start processing
01:35:52 - cmdstanpy - INFO - Chain [1] done processing
01:35:52 - cmdstanpy - INFO - Chain [1] start processing
01:35:52 - cmdstanpy - INFO - Chain [1] done processing
01:35:53 - cmdstanpy - INFO - Chain [1] start processing
01:35:53 - cmdstanpy - INFO - Chain [1] done processing
01:35:53 - cmdstanpy - INFO - Chain [1] start processing
01:35:53 - cmdstanpy - INFO - Chain [1] done processing
01:35:53 - cmdstanpy - INFO - Chain [1] start processing
01:35:53 - cmdstanpy - INFO - Chain [1] done processing
01:35:53 - cmdstanpy - INFO - Chain [1] start processing
01:35:53 - cmdstanpy - INFO - Chain [1] done processing
01:35:54 - cmdstanpy - INFO - Chain [1] start processing
01:35:54 - cmdstanpy - INFO - Chain [1] done processing
01:35:54 - cmdstanpy - INFO - Chain [1] start processing
01:35:54 - cmdstanpy - INFO - Chain [1] done processing
01:35:54 - cmdstanpy - INFO - Chain [1] start processing
01:35:54 - cmdstanpy - INFO - Chain [1] done processing
01:35:55 - cmdstanpy - INFO - Chain [1] start processing
01:35:55 - cmdstanpy - INFO - Chain [1] done processing
01:35:55 - cmdstanpy - INFO - Chain [1] start processing
01:35:55 - cmdstanpy - INFO - Chain [1] done processing
01:35:55 - cmdstanpy - INFO - Chain [1] start processing
01:35:55 - cmdstanpy - INFO - Chain [1] done processing
01:35:55 - cmdstanpy - INFO - Chain [1] start processing
01:35:56 - cmdstanpy - INFO - Chain [1] done processing
01:35:56 - cmdstanpy - INFO - Chain [1] start processing
01:35:56 - cmdstanpy - INFO - Chain [1] done processing
01:35:56 - cmdstanpy - INFO - Chain [1] start processing
01:35:57 - cmdstanpy - INFO - Chain [1] done processing
01:35:57 - cmdstanpy - INFO - Chain [1] start processing
01:35:58 - cmdstanpy - INFO - Chain [1] done processing
01:36:00 - cmdstanpy - INFO - Chain [1] start processing
01:36:00 - cmdstanpy - INFO - Chain [1] done processing
01:36:00 - cmdstanpy - INFO - Chain [1] start processing
01:36:01 - cmdstanpy - INFO - Chain [1] done processing
01:36:01 - cmdstanpy - INFO - Chain [1] start processing
01:36:01 - cmdstanpy - INFO - Chain [1] done processing
01:36:02 - cmdstanpy - INFO - Chain [1] start processing
01:36:02 - cmdstanpy - INFO - Chain [1] done processing
01:36:02 - cmdstanpy - INFO - Chain [1] start processing
01:36:02 - cmdstanpy - INFO - Chain [1] done processing
01:36:03 - cmdstanpy - INFO - Chain [1] start processing
01:36:03 - cmdstanpy - INFO - Chain [1] done processing
01:36:03 - cmdstanpy - INFO - Chain [1] start processing
01:36:04 - cmdstanpy - INFO - Chain [1] done processing
01:36:04 - cmdstanpy - INFO - Chain [1] start processing
01:36:04 - cmdstanpy - INFO - Chain [1] done processing
01:36:05 - cmdstanpy - INFO - Chain [1] start processing
01:36:05 - cmdstanpy - INFO - Chain [1] done processing
01:36:05 - cmdstanpy - INFO - Chain [1] start processing
01:36:05 - cmdstanpy - INFO - Chain [1] done processing
01:36:06 - cmdstanpy - INFO - Chain [1] start processing
01:36:06 - cmdstanpy - INFO - Chain [1] done processing
01:36:06 - cmdstanpy - INFO - Chain [1] start processing
01:36:07 - cmdstanpy - INFO - Chain [1] done processing
01:36:07 - cmdstanpy - INFO - Chain [1] start processing
01:36:07 - cmdstanpy - INFO - Chain [1] done processing
01:36:08 - cmdstanpy - INFO - Chain [1] start processing
01:36:08 - cmdstanpy - INFO - Chain [1] done processing
01:36:08 - cmdstanpy - INFO - Chain [1] start processing
01:36:09 - cmdstanpy - INFO - Chain [1] done processing
01:36:09 - cmdstanpy - INFO - Chain [1] start processing
01:36:10 - cmdstanpy - INFO - Chain [1] done processing
 67%|██████▋   | 4/6 [01:38<00:42, 21.44s/it]01:36:10 - cmdstanpy - INFO - Chain [1] start processing
01:36:10 - cmdstanpy - INFO - Chain [1] done processing
01:36:10 - cmdstanpy - INFO - Chain [1] start processing
01:36:10 - cmdstanpy - INFO - Chain [1] done processing
01:36:10 - cmdstanpy - INFO - Chain [1] start processing
01:36:10 - cmdstanpy - INFO - Chain [1] done processing
01:36:11 - cmdstanpy - INFO - Chain [1] start processing
01:36:11 - cmdstanpy - INFO - Chain [1] done processing
01:36:11 - cmdstanpy - INFO - Chain [1] start processing
01:36:11 - cmdstanpy - INFO - Chain [1] done processing
01:36:11 - cmdstanpy - INFO - Chain [1] start processing
01:36:11 - cmdstanpy - INFO - Chain [1] done processing
01:36:12 - cmdstanpy - INFO - Chain [1] start processing
01:36:12 - cmdstanpy - INFO - Chain [1] done processing
01:36:12 - cmdstanpy - INFO - Chain [1] start processing
01:36:12 - cmdstanpy - INFO - Chain [1] done processing
01:36:12 - cmdstanpy - INFO - Chain [1] start processing
01:36:12 - cmdstanpy - INFO - Chain [1] done processing
01:36:13 - cmdstanpy - INFO - Chain [1] start processing
01:36:13 - cmdstanpy - INFO - Chain [1] done processing
01:36:13 - cmdstanpy - INFO - Chain [1] start processing
01:36:13 - cmdstanpy - INFO - Chain [1] done processing
01:36:13 - cmdstanpy - INFO - Chain [1] start processing
01:36:13 - cmdstanpy - INFO - Chain [1] done processing
01:36:14 - cmdstanpy - INFO - Chain [1] start processing
01:36:14 - cmdstanpy - INFO - Chain [1] done processing
01:36:14 - cmdstanpy - INFO - Chain [1] start processing
01:36:14 - cmdstanpy - INFO - Chain [1] done processing
01:36:14 - cmdstanpy - INFO - Chain [1] start processing
01:36:14 - cmdstanpy - INFO - Chain [1] done processing
01:36:14 - cmdstanpy - INFO - Chain [1] start processing
01:36:15 - cmdstanpy - INFO - Chain [1] done processing
01:36:15 - cmdstanpy - INFO - Chain [1] start processing
01:36:15 - cmdstanpy - INFO - Chain [1] done processing
01:36:15 - cmdstanpy - INFO - Chain [1] start processing
01:36:15 - cmdstanpy - INFO - Chain [1] done processing
01:36:15 - cmdstanpy - INFO - Chain [1] start processing
01:36:15 - cmdstanpy - INFO - Chain [1] done processing
01:36:16 - cmdstanpy - INFO - Chain [1] start processing
01:36:16 - cmdstanpy - INFO - Chain [1] done processing
01:36:16 - cmdstanpy - INFO - Chain [1] start processing
01:36:16 - cmdstanpy - INFO - Chain [1] done processing
01:36:16 - cmdstanpy - INFO - Chain [1] start processing
01:36:17 - cmdstanpy - INFO - Chain [1] done processing
01:36:17 - cmdstanpy - INFO - Chain [1] start processing
01:36:17 - cmdstanpy - INFO - Chain [1] done processing
01:36:17 - cmdstanpy - INFO - Chain [1] start processing
01:36:18 - cmdstanpy - INFO - Chain [1] done processing
01:36:18 - cmdstanpy - INFO - Chain [1] start processing
01:36:18 - cmdstanpy - INFO - Chain [1] done processing
01:36:18 - cmdstanpy - INFO - Chain [1] start processing
01:36:19 - cmdstanpy - INFO - Chain [1] done processing
01:36:19 - cmdstanpy - INFO - Chain [1] start processing
01:36:20 - cmdstanpy - INFO - Chain [1] done processing
01:36:20 - cmdstanpy - INFO - Chain [1] start processing
01:36:20 - cmdstanpy - INFO - Chain [1] done processing
01:36:20 - cmdstanpy - INFO - Chain [1] start processing
01:36:21 - cmdstanpy - INFO - Chain [1] done processing
01:36:21 - cmdstanpy - INFO - Chain [1] start processing
01:36:21 - cmdstanpy - INFO - Chain [1] done processing
01:36:21 - cmdstanpy - INFO - Chain [1] start processing
01:36:22 - cmdstanpy - INFO - Chain [1] done processing
01:36:22 - cmdstanpy - INFO - Chain [1] start processing
01:36:23 - cmdstanpy - INFO - Chain [1] done processing
01:36:23 - cmdstanpy - INFO - Chain [1] start processing
01:36:23 - cmdstanpy - INFO - Chain [1] done processing
01:36:23 - cmdstanpy - INFO - Chain [1] start processing
01:36:24 - cmdstanpy - INFO - Chain [1] done processing
01:36:24 - cmdstanpy - INFO - Chain [1] start processing
01:36:24 - cmdstanpy - INFO - Chain [1] done processing
01:36:25 - cmdstanpy - INFO - Chain [1] start processing
01:36:25 - cmdstanpy - INFO - Chain [1] done processing
01:36:25 - cmdstanpy - INFO - Chain [1] start processing
01:36:26 - cmdstanpy - INFO - Chain [1] done processing
01:36:26 - cmdstanpy - INFO - Chain [1] start processing
01:36:26 - cmdstanpy - INFO - Chain [1] done processing
01:36:27 - cmdstanpy - INFO - Chain [1] start processing
01:36:27 - cmdstanpy - INFO - Chain [1] done processing
01:36:27 - cmdstanpy - INFO - Chain [1] start processing
01:36:28 - cmdstanpy - INFO - Chain [1] done processing
 83%|████████▎ | 5/6 [01:57<00:20, 20.31s/it]01:36:28 - cmdstanpy - INFO - Chain [1] start processing
01:36:28 - cmdstanpy - INFO - Chain [1] done processing
01:36:28 - cmdstanpy - INFO - Chain [1] start processing
01:36:28 - cmdstanpy - INFO - Chain [1] done processing
01:36:29 - cmdstanpy - INFO - Chain [1] start processing
01:36:29 - cmdstanpy - INFO - Chain [1] done processing
01:36:29 - cmdstanpy - INFO - Chain [1] start processing
01:36:29 - cmdstanpy - INFO - Chain [1] done processing
01:36:29 - cmdstanpy - INFO - Chain [1] start processing
01:36:29 - cmdstanpy - INFO - Chain [1] done processing
01:36:30 - cmdstanpy - INFO - Chain [1] start processing
01:36:30 - cmdstanpy - INFO - Chain [1] done processing
01:36:30 - cmdstanpy - INFO - Chain [1] start processing
01:36:30 - cmdstanpy - INFO - Chain [1] done processing
01:36:30 - cmdstanpy - INFO - Chain [1] start processing
01:36:30 - cmdstanpy - INFO - Chain [1] done processing
01:36:31 - cmdstanpy - INFO - Chain [1] start processing
01:36:31 - cmdstanpy - INFO - Chain [1] done processing
01:36:31 - cmdstanpy - INFO - Chain [1] start processing
01:36:31 - cmdstanpy - INFO - Chain [1] done processing
01:36:31 - cmdstanpy - INFO - Chain [1] start processing
01:36:31 - cmdstanpy - INFO - Chain [1] done processing
01:36:32 - cmdstanpy - INFO - Chain [1] start processing
01:36:32 - cmdstanpy - INFO - Chain [1] done processing
01:36:32 - cmdstanpy - INFO - Chain [1] start processing
01:36:32 - cmdstanpy - INFO - Chain [1] done processing
01:36:32 - cmdstanpy - INFO - Chain [1] start processing
01:36:32 - cmdstanpy - INFO - Chain [1] done processing
01:36:32 - cmdstanpy - INFO - Chain [1] start processing
01:36:33 - cmdstanpy - INFO - Chain [1] done processing
01:36:33 - cmdstanpy - INFO - Chain [1] start processing
01:36:33 - cmdstanpy - INFO - Chain [1] done processing
01:36:33 - cmdstanpy - INFO - Chain [1] start processing
01:36:33 - cmdstanpy - INFO - Chain [1] done processing
01:36:33 - cmdstanpy - INFO - Chain [1] start processing
01:36:33 - cmdstanpy - INFO - Chain [1] done processing
01:36:34 - cmdstanpy - INFO - Chain [1] start processing
01:36:34 - cmdstanpy - INFO - Chain [1] done processing
01:36:34 - cmdstanpy - INFO - Chain [1] start processing
01:36:34 - cmdstanpy - INFO - Chain [1] done processing
01:36:34 - cmdstanpy - INFO - Chain [1] start processing
01:36:34 - cmdstanpy - INFO - Chain [1] done processing
01:36:35 - cmdstanpy - INFO - Chain [1] start processing
01:36:35 - cmdstanpy - INFO - Chain [1] done processing
01:36:35 - cmdstanpy - INFO - Chain [1] start processing
01:36:36 - cmdstanpy - INFO - Chain [1] done processing
01:36:36 - cmdstanpy - INFO - Chain [1] start processing
01:36:36 - cmdstanpy - INFO - Chain [1] done processing
01:36:36 - cmdstanpy - INFO - Chain [1] start processing
01:36:37 - cmdstanpy - INFO - Chain [1] done processing
01:36:37 - cmdstanpy - INFO - Chain [1] start processing
01:36:37 - cmdstanpy - INFO - Chain [1] done processing
01:36:37 - cmdstanpy - INFO - Chain [1] start processing
01:36:38 - cmdstanpy - INFO - Chain [1] done processing
01:36:38 - cmdstanpy - INFO - Chain [1] start processing
01:36:39 - cmdstanpy - INFO - Chain [1] done processing
01:36:39 - cmdstanpy - INFO - Chain [1] start processing
01:36:39 - cmdstanpy - INFO - Chain [1] done processing
01:36:39 - cmdstanpy - INFO - Chain [1] start processing
01:36:40 - cmdstanpy - INFO - Chain [1] done processing
01:36:40 - cmdstanpy - INFO - Chain [1] start processing
01:36:40 - cmdstanpy - INFO - Chain [1] done processing
01:36:40 - cmdstanpy - INFO - Chain [1] start processing
01:36:41 - cmdstanpy - INFO - Chain [1] done processing
01:36:41 - cmdstanpy - INFO - Chain [1] start processing
01:36:41 - cmdstanpy - INFO - Chain [1] done processing
01:36:41 - cmdstanpy - INFO - Chain [1] start processing
01:36:42 - cmdstanpy - INFO - Chain [1] done processing
01:36:42 - cmdstanpy - INFO - Chain [1] start processing
01:36:43 - cmdstanpy - INFO - Chain [1] done processing
01:36:43 - cmdstanpy - INFO - Chain [1] start processing
01:36:44 - cmdstanpy - INFO - Chain [1] done processing
01:36:44 - cmdstanpy - INFO - Chain [1] start processing
01:36:44 - cmdstanpy - INFO - Chain [1] done processing
01:36:44 - cmdstanpy - INFO - Chain [1] start processing
01:36:45 - cmdstanpy - INFO - Chain [1] done processing
01:36:45 - cmdstanpy - INFO - Chain [1] start processing
01:36:45 - cmdstanpy - INFO - Chain [1] done processing
01:36:46 - cmdstanpy - INFO - Chain [1] start processing
01:36:46 - cmdstanpy - INFO - Chain [1] done processing
100%|██████████| 6/6 [02:15<00:00, 22.58s/it]
     changepoint_prior_scale seasonality_mode  n_changepoints  forier_id  \
147                    0.500   multiplicative              60         20   
183                    0.800   multiplicative              50         20   
143                    0.500   multiplicative              50         20   
193                    0.800   multiplicative              80         10   
182                    0.800   multiplicative              50         15   
..                       ...              ...             ...        ...   
17                     0.001         additive              90         10   
13                     0.001         additive              80         10   
9                      0.001         additive              70         10   
5                      0.001         additive              60         10   
1                      0.001         additive              50         10   

     relative_error_train%  relative_rmse_test_%  
147              14.631177             48.962816  
183              14.380187             49.050205  
143              14.628050             49.124727  
193              14.561205             49.147330  
182              14.413348             49.172578  
..                     ...                   ...  
17               24.644042            176.659975  
13               24.644099            176.664969  
9                24.644324            176.686056  
5                24.644508            176.703124  
1                24.644568            176.708461  

[240 rows x 6 columns]

أفضل نموذج

معاملات افضل نموذج هي : changepoint_prior_scale 0.8 seasonality_mode multiplicative n_changepoints 90 forier_id 15 relative_error_train% 15.571814 relative_rmse_test_% 63.535393

In [ ]:
best_cp = 0.8
best_mode = 'multiplicative'
best_n_changepoint=90
best_f=15
model = Prophet(weekly_seasonality=False,changepoint_prior_scale=best_cp, seasonality_mode=best_mode,n_changepoints=best_n_changepoint)
model.add_seasonality(name='weekly', period=7, fourier_order=f)
model.fit(df_prophet)

future = model.make_future_dataframe(periods=10)
forecast = model.predict(future)

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=df_prophet['ds'],
    y=df_prophet['y'],
    mode='markers',
    name='Original Data',
    marker=dict(color='blue')
))

fig.add_trace(go.Scatter(
    x=forecast['ds'],
    y=forecast['yhat'],
    mode='lines',
    name='Forecast',
    line=dict(color='red')
))

# fig.add_trace(go.Scatter(
#     x=pd.concat([forecast['ds'], forecast['ds'][::-1]]),
#     y=pd.concat([forecast['yhat_upper'], forecast['yhat_lower'][::-1]]),
#     fill='toself',
#     fillcolor='rgba(255,0,0,0.2)',
#     line=dict(color='rgba(255,255,255,0)'),
#     hoverinfo="skip",
#     showlegend=True,
#     name='Confidence Interval'
# ))

# fig.update_layout(
#     title=f'The best Forecast with Tuned Parameters (CP={best_cp}, Mode={best_mode})',
#     xaxis_title='Date',
#     yaxis_title='Value',
#     template='plotly_white'
# )

# fig.show()
image_path = r"C:\Users\ASUS\OneDrive\Desktop\images\Screenshot 2025-06-20 192921.png"
image = Image.open(image_path)
display(image)
17:14:36 - cmdstanpy - INFO - Chain [1] start processing
17:14:36 - cmdstanpy - INFO - Chain [1] done processing
No description has been provided for this image
In [ ]:
metrics_results = []

for cp in tqdm(changepoint_values):
    for mode in seasonality_modes:
        model = Prophet(weekly_seasonality=False,changepoint_prior_scale=cp, seasonality_mode=mode,n_changepoints=n)
        model.add_seasonality(name='weekly', period=7, fourier_order=f)

        model.fit(train)
        future = model.make_future_dataframe(periods=10)
        forecast = model.predict(future)

        y_pred = forecast[['ds', 'yhat']].set_index('ds').loc[test['ds']]
        y_true = test.set_index('ds')['y']

        mae = mean_absolute_error(y_true, y_pred['yhat'])
        rmse = np.sqrt(mean_squared_error(y_true, y_pred['yhat']))
        mape = np.mean(np.abs((y_true - y_pred['yhat']) / y_true)) * 100
        r2 = r2_score(y_true, y_pred['yhat'])

        mean_y = y_true.mean()
        mae_pct = (mae / mean_y) * 100
        rmse_pct = (rmse / mean_y) * 100
        r2_pct = r2 * 100

        metrics_results.append({
            'model': f'Prophet_cp={cp}_mode={mode}',
            'MAE (%)': mae_pct,
            'RMSE (%)': rmse_pct,
            'MAPE (%)': mape,
            'R2 Score (%)': r2_pct
        })

y_true_baseline = test['y'].values
y_pred_baseline = test['y'].shift(1).fillna(method='bfill').values

baseline_mae = mean_absolute_error(y_true_baseline, y_pred_baseline)
baseline_rmse = np.sqrt(mean_squared_error(y_true_baseline, y_pred_baseline))
baseline_mape = np.mean(np.abs((y_true_baseline - y_pred_baseline) / y_true_baseline)) * 100
baseline_r2 = r2_score(y_true_baseline, y_pred_baseline)

mean_y_baseline = y_true_baseline.mean()
baseline_mae_pct = (baseline_mae / mean_y_baseline) * 100
baseline_rmse_pct = (baseline_rmse / mean_y_baseline) * 100
baseline_r2_pct = baseline_r2 * 100

metrics_results.append({
    'model': 'Baseline (Previous Day)',
    'MAE (%)': baseline_mae_pct,
    'RMSE (%)': baseline_rmse_pct,
    'MAPE (%)': baseline_mape,
    'R2 Score (%)': baseline_r2_pct
})

df_all_results = pd.DataFrame(metrics_results).sort_values(by='RMSE (%)')
print(df_all_results)
  0%|          | 0/6 [00:00<?, ?it/s]01:37:01 - cmdstanpy - INFO - Chain [1] start processing
01:37:01 - cmdstanpy - INFO - Chain [1] done processing
01:37:01 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:37:01 - cmdstanpy - INFO - Chain [1] start processing
01:37:03 - cmdstanpy - INFO - Chain [1] done processing
01:37:03 - cmdstanpy - INFO - Chain [1] start processing
01:37:03 - cmdstanpy - INFO - Chain [1] done processing
01:37:03 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
01:37:03 - cmdstanpy - INFO - Chain [1] start processing
01:37:05 - cmdstanpy - INFO - Chain [1] done processing
 17%|█▋        | 1/6 [00:04<00:22,  4.41s/it]01:37:06 - cmdstanpy - INFO - Chain [1] start processing
01:37:06 - cmdstanpy - INFO - Chain [1] done processing
01:37:06 - cmdstanpy - INFO - Chain [1] start processing
01:37:06 - cmdstanpy - INFO - Chain [1] done processing
 33%|███▎      | 2/6 [00:04<00:08,  2.14s/it]01:37:06 - cmdstanpy - INFO - Chain [1] start processing
01:37:06 - cmdstanpy - INFO - Chain [1] done processing
01:37:06 - cmdstanpy - INFO - Chain [1] start processing
01:37:07 - cmdstanpy - INFO - Chain [1] done processing
 50%|█████     | 3/6 [00:06<00:04,  1.66s/it]01:37:07 - cmdstanpy - INFO - Chain [1] start processing
01:37:07 - cmdstanpy - INFO - Chain [1] done processing
01:37:07 - cmdstanpy - INFO - Chain [1] start processing
01:37:08 - cmdstanpy - INFO - Chain [1] done processing
 67%|██████▋   | 4/6 [00:07<00:02,  1.41s/it]01:37:08 - cmdstanpy - INFO - Chain [1] start processing
01:37:08 - cmdstanpy - INFO - Chain [1] done processing
01:37:09 - cmdstanpy - INFO - Chain [1] start processing
01:37:09 - cmdstanpy - INFO - Chain [1] done processing
 83%|████████▎ | 5/6 [00:08<00:01,  1.30s/it]01:37:09 - cmdstanpy - INFO - Chain [1] start processing
01:37:09 - cmdstanpy - INFO - Chain [1] done processing
01:37:10 - cmdstanpy - INFO - Chain [1] start processing
01:37:10 - cmdstanpy - INFO - Chain [1] done processing
100%|██████████| 6/6 [00:09<00:00,  1.55s/it]
                                   model     MAE (%)    RMSE (%)    MAPE (%)  \
12               Baseline (Previous Day)   25.717051   31.229423   26.646085   
9     Prophet_cp=0.8_mode=multiplicative   44.241594   49.186542   55.134665   
7     Prophet_cp=0.5_mode=multiplicative   44.356000   49.353379   55.339817   
11    Prophet_cp=0.9_mode=multiplicative   44.354207   49.355102   55.337866   
5     Prophet_cp=0.3_mode=multiplicative   44.743061   49.980824   56.081909   
10          Prophet_cp=0.9_mode=additive   48.694533   56.091710   62.677341   
8           Prophet_cp=0.8_mode=additive   49.080165   56.539679   63.235939   
3     Prophet_cp=0.1_mode=multiplicative   50.864891   58.231680   65.737525   
6           Prophet_cp=0.5_mode=additive   50.552220   58.296615   65.372304   
4           Prophet_cp=0.3_mode=additive   53.667848   61.743497   69.612956   
2           Prophet_cp=0.1_mode=additive   62.791544   70.621015   80.886917   
1   Prophet_cp=0.001_mode=multiplicative  166.698878  170.913082  198.125518   
0         Prophet_cp=0.001_mode=additive  169.286963  173.184711  200.656321   

    R2 Score (%)  
12     13.208652  
9    -115.298549  
7    -116.761575  
11   -116.776708  
5    -122.308124  
10   -179.992161  
8    -184.482256  
3    -201.763809  
6    -202.437191  
4    -239.258676  
2    -343.829766  
1   -2499.555676  
0   -2569.116997  

In [180]:
forecast
Out[180]:
ds trend yhat_lower yhat_upper trend_lower trend_upper multiplicative_terms multiplicative_terms_lower multiplicative_terms_upper weekly weekly_lower weekly_upper additive_terms additive_terms_lower additive_terms_upper yhat
0 2024-01-01 2343.491624 13747.085255 37305.805023 2343.491624 2343.491624 10.091589 10.091589 10.091589 10.091589 10.091589 10.091589 0.0 0.0 0.0 25993.046582
1 2024-01-02 2381.146574 17412.118634 41091.531163 2381.146574 2381.146574 11.004463 11.004463 11.004463 11.004463 11.004463 11.004463 0.0 0.0 0.0 28584.386158
2 2024-01-03 2418.801523 16319.582614 40903.772811 2418.801523 2418.801523 10.927195 10.927195 10.927195 10.927195 10.927195 10.927195 0.0 0.0 0.0 28849.516759
3 2024-01-04 2456.456472 18439.992842 41449.626941 2456.456472 2456.456472 11.160155 11.160155 11.160155 11.160155 11.160155 11.160155 0.0 0.0 0.0 29870.892428
4 2024-01-05 2494.183593 18256.230975 41575.668321 2494.183593 2494.183593 10.952976 10.952976 10.952976 10.952976 10.952976 10.952976 0.0 0.0 0.0 29812.915671
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
361 2024-12-27 3445.907616 28950.375190 54136.999177 3151.000802 3741.103629 10.952976 10.952976 10.952976 10.952976 10.952976 10.952976 0.0 0.0 0.0 41188.849721
362 2024-12-28 3384.846925 28599.053304 54902.341223 2983.054230 3772.332049 11.390827 11.390827 11.390827 11.390827 11.390827 11.390827 0.0 0.0 0.0 41941.052968
363 2024-12-29 3323.786234 24862.432060 51321.591794 2820.367000 3803.296743 10.436095 10.436095 10.436095 10.436095 10.436095 10.436095 0.0 0.0 0.0 38011.134463
364 2024-12-30 3262.725543 22944.953777 49364.204045 2639.014316 3835.014011 10.091589 10.091589 10.091589 10.091589 10.091589 10.091589 0.0 0.0 0.0 36188.811664
365 2024-12-31 3201.664853 23346.908402 52436.961865 2447.545392 3890.554549 11.004463 11.004463 11.004463 11.004463 11.004463 11.004463 0.0 0.0 0.0 38434.267555

366 rows × 16 columns

b. التحليل العام لانماط الاستخدام

b.1

In [181]:
bins = [1,2 ,3,3.95,4, 5, 7, 10, 15, 30, 60, 120]
labels = [
    '1–2 $',
    '2–3 $',
    '3–3.95 $',
     '3.95–4 $',
    '4–5 $',
    '5–7 $',
    '7–10 $',
    '10–15 $',
    '15–30 $',
    '30–60 $',
    '1–2 hr (60–120 $)',

]

df['total_category'] = pd.cut(df['total_cost'], bins=bins, labels=labels, right=False)

print(df['total_category'].value_counts())
total_category
3.95–4 $             3011977
4–5 $                1147289
1–2 $                 717707
2–3 $                 677682
3–3.95 $              304753
5–7 $                 129323
7–10 $                 60219
10–15 $                29250
15–30 $                15738
1–2 hr (60–120 $)       6010
30–60 $                 3794
Name: count, dtype: int64
In [ ]:
def cost_category(cost):
    if cost < 3:
        return  3
    elif cost < 7:
        return  2 
    else:
        return 1 

df['cost_category'] = df['total_cost'].apply(cost_category)
In [183]:
locations_df['NAME'] = locations_df['NAME'].str.strip().str.lower()
df['start_station_name'] = df['start_station_name'].str.strip().str.lower()
In [184]:
capacity_bin_map = locations_df.set_index('NAME')['CAPACITY_BIN'].to_dict()

df['start_capacity_bin'] = df['start_station_name'].map(capacity_bin_map)
df['start_capacity_bin']
Out[184]:
0           Small
1           Small
2           Small
3           Small
4           Small
            ...  
6103737    Medium
6103738    Medium
6103739    Medium
6103740    Medium
6103741    Medium
Name: start_capacity_bin, Length: 6103742, dtype: object
In [191]:
df['start_date'] = pd.to_datetime(df['started_at'].dt.date)

df['start_date'] = pd.to_datetime(df['start_date'])
weather_df['date'] = pd.to_datetime(weather_df['date'])
df = df.drop(columns=[col for col in ['date', 'weather_bin'] if col in df.columns])

df_merge = df.merge(
    weather_df[['date', 'temp', 'humidity', 'windspeed', 'weather_bin']],
    how='left',
    left_on='start_date',
    right_on='date'
)
df_merge['start_capacity_bin'] = df_merge['start_station_name'].map(capacity_bin_map)

df_sample = df_merge.dropna(subset=[
    'rideable_type',
    'member_casual',
    'cost_category',
    'temp',
    'humidity',
    'windspeed',
    'weather_bin',
    'distance_to_business_area_x',
    'start_distance_to_shuttle_m',
    'start_distance_to_metro_m',
  
    'start_capacity_bin'
,
'start_geohash',
'end_geohash'
])

samples_per_group = 500

grouped_sample = (
    df_sample
    .groupby([
        'rideable_type',
        'member_casual',
        'weather_bin',
    ], group_keys=False)
    .apply(lambda x: x.sample(min(len(x), samples_per_group), random_state=42))
)

print(" حجم العينة النهائي:", grouped_sample.shape[0])
 حجم العينة النهائي: 6000
In [ ]:
numeric_features = [
    'temp', 'humidity', 'windspeed', 'ride_duration_min',
    'cost_category', 'distance_to_business_area_x',
    'start_distance_to_shuttle_m', 'start_distance_to_metro_m'
]

categorical_features = [
    'rideable_type', 'member_casual',
    'weather_bin', 'start_geohash','end_geohash', 'start_capacity_bin'
]

all_features = numeric_features + categorical_features
grouped_sample = grouped_sample.dropna(subset=all_features)

grouped_sample[categorical_features] = grouped_sample[categorical_features].astype(str)

preprocessor = ColumnTransformer(transformers=[
    ('num', StandardScaler(), numeric_features),
    ('cat', OneHotEncoder(drop='first', sparse_output=False), categorical_features)
])

X_processed = preprocessor.fit_transform(grouped_sample)
In [ ]:
k = 3
kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
kmeans_labels = kmeans.fit_predict(X_processed)
sil_score = silhouette_score(X_processed, kmeans_labels)
print(f"KMeans - Silhouette Score = {sil_score:.4f}")
grouped_sample['kmeans_cluster'] = kmeans_labels
KMeans - Silhouette Score = 0.1301
In [ ]:
k = 3
kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
kmeans_labels = kmeans.fit_predict(X_processed)
algo_name = "KMeans"

sil_score = silhouette_score(X_processed, kmeans_labels)
print(f"KMeans - Silhouette Score = {sil_score:.4f}")

grouped_sample['kmeans_cluster'] = kmeans_labels
print(f"\n--- Cluster Description for {algo_name} ---")

print("\nAverage Values of Numerical Features per Cluster:")
numerical_summary = grouped_sample.groupby('kmeans_cluster')[numeric_features].mean()
print(numerical_summary)
print("\nDistribution of Categorical Features per Cluster:")
for feature in categorical_features:
    print(f"\nFeature: {feature}")
    cluster_counts = grouped_sample.groupby(['kmeans_cluster', feature]).size().unstack(fill_value=0)
    cluster_percentages = cluster_counts.apply(lambda x: x / x.sum(), axis=1) * 100
    print(cluster_percentages.round(2)) 

print("\nCluster Sizes:")
print(grouped_sample['kmeans_cluster'].value_counts().sort_index())

pca = PCA(n_components=2)
components = pca.fit_transform(X_processed)
pca_df = pd.DataFrame(data = components, columns = ['PC1', 'PC2'])
pca_df['kmeans_cluster'] = kmeans_labels
pca_df['kmeans_cluster'] = pca_df['kmeans_cluster'].astype(str) 

fig = px.scatter(
    pca_df,
    x='PC1',
    y='PC2',
    color='kmeans_cluster',
    title='KMeans Clustering (PCA Reduced Dimensions)',
    labels={'PC1': 'Principal Component 1', 'PC2': 'Principal Component 2'},
    hover_data={'PC1':False, 'PC2':False, 'kmeans_cluster':True} 

centroids_pca = pca.transform(kmeans.cluster_centers_)
fig.add_trace(go.Scatter(
    x=centroids_pca[:, 0], y=centroids_pca[:, 1],
    mode='markers',
    marker=dict(size=15, symbol='x', color='black'),
    name='Centroids'
))

#fig.show(renderer="browser")

image_path = r'C:\Users\asus\Desktop\5\2\DM\Homework\نتائج\Screenshot 2025-06-20 185735.png'

image = Image.open(image_path)
display(image)

processed_feature_names = numeric_features[:]
ohe_feature_names = preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_features)
processed_feature_names.extend(ohe_feature_names)

loadings = pd.DataFrame(pca.components_.T, columns=['PC1', 'PC2'], index=processed_feature_names)
print("\nPCA Loadings (Contribution of Original Features to Principal Components):")
print(loadings.to_string())
KMeans - Silhouette Score = 0.1301

--- Cluster Description for KMeans ---

Average Values of Numerical Features per Cluster:
                     temp   humidity  windspeed  ride_duration_min  \
kmeans_cluster                                                       
0               14.642857  65.783117  20.644156          43.850649   
1               14.171036  64.839036  20.861928          11.192578   
2               14.495582  65.109511  21.502885          16.962084   

                cost_category  distance_to_business_area_x  \
kmeans_cluster                                               
0                    2.506494                     0.230632   
1                    3.000000                     0.016970   
2                    1.967516                     0.014864   

                start_distance_to_shuttle_m  start_distance_to_metro_m  
kmeans_cluster                                                          
0                              21578.148022                1033.338398  
1                               1170.632474                 125.137413  
2                                987.248651                 115.293326  

Distribution of Categorical Features per Cluster:

Feature: rideable_type
rideable_type   classic_bike  electric_bike
kmeans_cluster                             
0                      74.03          25.97
1                      58.36          41.64
2                      45.01          54.99

Feature: member_casual
member_casual   casual  member
kmeans_cluster                
0                72.73   27.27
1               100.00    0.00
2                22.58   77.42

Feature: weather_bin
weather_bin     Cloudy  Rainy  Sunny
kmeans_cluster                      
0                37.66  29.87  32.47
1                35.33  31.90  32.77
2                32.17  34.17  33.65

Feature: start_geohash
start_geohash   dqbvrzn  dqbvx3p  dqbvx7w  dqbvx96  dqbvxc8  dqbvxe9  dqbvxek  \
kmeans_cluster                                                                  
0                   1.3      2.6      1.3      1.3      2.6      1.3     5.19   
1                   0.0      0.0      0.0      0.0      0.0      0.0     0.00   
2                   0.0      0.0      0.0      0.0      0.0      0.0     0.00   

start_geohash   dqbvxeu  dqbvxfe  dqbvxgd  ...  dqcnj5x  dqcnk9e  dqcnke2  \
kmeans_cluster                             ...                              
0                   1.3      2.6      1.3  ...     0.00      1.3      1.3   
1                   0.0      0.0      0.0  ...     0.05      0.0      0.0   
2                   0.0      0.0      0.0  ...     0.00      0.0      0.0   

start_geohash   dqcnkjn  dqcnndr  dqcnndt  dqcnnsm  dqcns19  dqcns48  dqcq35j  
kmeans_cluster                                                                 
0                   3.9     0.00     0.00     0.00      1.3      1.3      1.3  
1                   0.0     0.00     0.14     0.10      0.0      0.0      0.0  
2                   0.0     0.03     0.03     0.03      0.0      0.0      0.0  

[3 rows x 606 columns]

Feature: end_geohash
end_geohash     dqbvx7w  dqbvxdr  dqbvxe9  dqbvxek  dqbvxeu  dqbvxfe  dqbvxgd  \
kmeans_cluster                                                                  
0                   1.3      1.3      1.3      1.3      1.3      1.3      2.6   
1                   0.0      0.0      0.0      0.0      0.0      0.0      0.0   
2                   0.0      0.0      0.0      0.0      0.0      0.0      0.0   

end_geohash     dqbvxsm  dqbvxsw  dqbvxv0  ...  dqcnk9e  dqcnke2  dqcnksb  \
kmeans_cluster                             ...                              
0                   2.6      1.3      2.6  ...      1.3      1.3      1.3   
1                   0.0      0.0      0.0  ...      0.0      0.0      0.0   
2                   0.0      0.0      0.0  ...      0.0      0.0      0.0   

end_geohash     dqcnn9r  dqcnndt  dqcnnee  dqcnnez  dqcnnsm  dqcns1f  dqcq349  
kmeans_cluster                                                                 
0                   0.0     0.00      0.0     0.00     0.00      1.3      1.3  
1                   0.1     0.00      0.1     0.00     0.05      0.0      0.0  
2                   0.0     0.05      0.0     0.03     0.00      0.0      0.0  

[3 rows x 608 columns]

Feature: start_capacity_bin
start_capacity_bin  Large  Medium  Small
kmeans_cluster                          
0                    0.00   24.68  75.32
1                   14.89   62.22  22.89
2                   17.18   61.72  21.10

Cluster Sizes:
kmeans_cluster
0      77
1    2075
2    3848
Name: count, dtype: int64
No description has been provided for this image
PCA Loadings (Contribution of Original Features to Principal Components):
                                  PC1       PC2
temp                         0.014190  0.383385
humidity                    -0.025208 -0.529767
windspeed                    0.005260  0.625620
ride_duration_min            0.072271  0.250989
cost_category                0.065864 -0.264844
distance_to_business_area_x  0.608527 -0.026624
start_distance_to_shuttle_m  0.621779 -0.025708
start_distance_to_metro_m    0.470708  0.020723
rideable_type_electric_bike -0.014255  0.002430
member_casual_member        -0.041230  0.060262
weather_bin_Rainy           -0.007257 -0.009168
weather_bin_Sunny            0.013008  0.204831
start_geohash_dqbvx3p        0.002388 -0.000364
start_geohash_dqbvx7w        0.001077  0.000072
start_geohash_dqbvx96        0.001233  0.000016
start_geohash_dqbvxc8        0.002127 -0.000408
start_geohash_dqbvxe9        0.001189  0.000014
start_geohash_dqbvxek        0.004823 -0.000160
start_geohash_dqbvxeu        0.001211  0.000230
start_geohash_dqbvxfe        0.002009  0.000190
start_geohash_dqbvxgd        0.001046 -0.000209
start_geohash_dqbvxs5        0.001225 -0.000009
start_geohash_dqbvxsm        0.002451 -0.000089
start_geohash_dqbvxsw        0.002386 -0.000160
start_geohash_dqbvxts        0.002621 -0.000035
start_geohash_dqbvxv0        0.001188 -0.000194
start_geohash_dqbvxzb        0.002805  0.000118
start_geohash_dqchbuk        0.000767 -0.000093
start_geohash_dqchbv1        0.000797 -0.000110
start_geohash_dqchby0        0.000656 -0.000093
start_geohash_dqchbyf        0.000708 -0.000230
start_geohash_dqchtzv        0.000674  0.000129
start_geohash_dqchuch        0.000314  0.000041
start_geohash_dqchv1g        0.000254  0.000097
start_geohash_dqchv37        0.000232  0.000239
start_geohash_dqchv64        0.000241 -0.000244
start_geohash_dqchv8b        0.000254  0.000367
start_geohash_dqchvu3        0.000198 -0.000263
start_geohash_dqchvue        0.000179 -0.000011
start_geohash_dqchvwn        0.000160  0.000147
start_geohash_dqchvy9        0.000140  0.000097
start_geohash_dqchvz1        0.000710  0.000127
start_geohash_dqchvz7        0.000138  0.000323
start_geohash_dqchwpv        0.000493  0.000152
start_geohash_dqchwr8        0.000399 -0.000013
start_geohash_dqchwrg        0.000431 -0.000235
start_geohash_dqchwwg        0.000277 -0.000018
start_geohash_dqchwwn        0.002209 -0.000927
start_geohash_dqchwwy        0.000408 -0.000037
start_geohash_dqchwxr        0.000205 -0.000157
start_geohash_dqchwz3        0.000266 -0.000058
start_geohash_dqchwzs        0.000342  0.000042
start_geohash_dqchwzv        0.000298 -0.000033
start_geohash_dqchxkn        0.000801 -0.000120
start_geohash_dqchxky        0.001547  0.000169
start_geohash_dqchxyb        0.000459 -0.000093
start_geohash_dqchy2q        0.000590 -0.000352
start_geohash_dqchy2w        0.000632  0.000063
start_geohash_dqchy3t        0.000585 -0.000120
start_geohash_dqchy6u        0.000512 -0.000334
start_geohash_dqchy83        0.000483 -0.000102
start_geohash_dqchy87        0.000309 -0.000009
start_geohash_dqchy8m        0.000735 -0.000030
start_geohash_dqchy9g        0.001729 -0.000404
start_geohash_dqchy9t        0.000158 -0.000135
start_geohash_dqchy9y        0.000214  0.000045
start_geohash_dqchyb1        0.001132  0.000665
start_geohash_dqchyb4        0.000761 -0.000040
start_geohash_dqchyc3        0.000537  0.000372
start_geohash_dqchycg        0.000422 -0.000210
start_geohash_dqchydb        0.000492 -0.000098
start_geohash_dqchydx        0.000153 -0.000193
start_geohash_dqchye5        0.001027 -0.000101
start_geohash_dqchyeq        0.000298 -0.000006
start_geohash_dqchyer        0.000191  0.000182
start_geohash_dqchykp        0.000150 -0.000072
start_geohash_dqchymq        0.000303 -0.000236
start_geohash_dqchypz        0.000365  0.000366
start_geohash_dqchyqu        0.000091  0.000072
start_geohash_dqchyr5        0.000086  0.000062
start_geohash_dqchysx        0.000162 -0.000180
start_geohash_dqchytn        0.000582  0.000115
start_geohash_dqchytr        0.000234 -0.000250
start_geohash_dqchyw0        0.000490 -0.000487
start_geohash_dqchyxj        0.000358  0.000133
start_geohash_dqchyxm        0.000361  0.000188
start_geohash_dqchyxs        0.000310 -0.000035
start_geohash_dqchzth        0.000066  0.000151
start_geohash_dqcj0bk        0.000746 -0.000294
start_geohash_dqcj1eu        0.000572  0.000047
start_geohash_dqcj1se        0.001171  0.000037
start_geohash_dqcj1tm        0.001107 -0.000538
start_geohash_dqcj4ey        0.000969 -0.000401
start_geohash_dqcj4s4        0.000500 -0.000121
start_geohash_dqcj4sd        0.000519  0.000010
start_geohash_dqcj4tb        0.000524 -0.000003
start_geohash_dqcj4td        0.000503 -0.000224
start_geohash_dqcj4v8        0.000493 -0.000238
start_geohash_dqcj4wk        0.001835  0.000106
start_geohash_dqcj5j3        0.000428 -0.000130
start_geohash_dqcj5mk        0.000360  0.000050
start_geohash_dqcj5vb        0.001022  0.000476
start_geohash_dqcj5vv        0.000320  0.000185
start_geohash_dqcj5xs        0.000309 -0.000218
start_geohash_dqcj5yg        0.000266 -0.000164
start_geohash_dqcj5ys        0.001128 -0.000275
start_geohash_dqcj5z1        0.000278 -0.000013
start_geohash_dqcj6em        0.000990 -0.000097
start_geohash_dqcj6uz        0.000470  0.000077
start_geohash_dqcj6v1        0.000407 -0.000085
start_geohash_dqcj6v2        0.000842 -0.000364
start_geohash_dqcj6y9        0.000486  0.000037
start_geohash_dqcj6yk        0.000950 -0.000064
start_geohash_dqcj6z6        0.000536 -0.000021
start_geohash_dqcj71s        0.000391 -0.000066
start_geohash_dqcj7jf        0.000377 -0.000203
start_geohash_dqcj845        0.000814 -0.000097
start_geohash_dqcj861        0.001873  0.001364
start_geohash_dqcjhkd        0.000241  0.000119
start_geohash_dqcjhph        0.000511 -0.000297
start_geohash_dqcjhqb        0.001203 -0.000005
start_geohash_dqcjhvj        0.000440  0.000102
start_geohash_dqcjhwf        0.000211  0.000110
start_geohash_dqcjhwp        0.000617 -0.000380
start_geohash_dqcjhzm        0.000271 -0.000071
start_geohash_dqcjj1h        0.000169  0.000125
start_geohash_dqcjj39        0.001022 -0.000039
start_geohash_dqcjj3d        0.000134 -0.000030
start_geohash_dqcjj6p        0.000126 -0.000136
start_geohash_dqcjj7y        0.000149 -0.000031
start_geohash_dqcjj82        0.000054 -0.000044
start_geohash_dqcjj8j        0.000274 -0.000283
start_geohash_dqcjjbh        0.000081 -0.000058
start_geohash_dqcjjdq        0.000188 -0.000084
start_geohash_dqcjjeu        0.000278 -0.000325
start_geohash_dqcjjf9        0.000068 -0.000053
start_geohash_dqcjjfz        0.000065  0.000234
start_geohash_dqcjjgn        0.000162  0.000061
start_geohash_dqcjjjd        0.000252  0.000126
start_geohash_dqcjjjz        0.000128 -0.000050
start_geohash_dqcjjk7        0.000228  0.000237
start_geohash_dqcjjkv        0.000161 -0.000188
start_geohash_dqcjjm2        0.000326  0.000024
start_geohash_dqcjjm5        0.000233 -0.000274
start_geohash_dqcjjmg        0.000830 -0.000134
start_geohash_dqcjjnt        0.000119  0.000165
start_geohash_dqcjjq0        0.001419  0.001528
start_geohash_dqcjjq2        0.000424 -0.000260
start_geohash_dqcjjq6        0.000107 -0.000106
start_geohash_dqcjjqe        0.000560  0.000542
start_geohash_dqcjjqr        0.000649 -0.000412
start_geohash_dqcjjr3        0.000216  0.000180
start_geohash_dqcjjuc        0.000091  0.000053
start_geohash_dqcjjuw        0.000470 -0.000377
start_geohash_dqcjjw9        0.000366 -0.000171
start_geohash_dqcjjwg        0.000491  0.000184
start_geohash_dqcjjwy        0.000675  0.001566
start_geohash_dqcjjxh        0.001032 -0.000241
start_geohash_dqcjjxq        0.000238  0.000017
start_geohash_dqcjjxx        0.000493 -0.000515
start_geohash_dqcjjy0        0.000396 -0.000382
start_geohash_dqcjjy5        0.000273 -0.000554
start_geohash_dqcjjyf        0.000135  0.000103
start_geohash_dqcjjzc        0.000063 -0.000251
start_geohash_dqcjjzh        0.000221 -0.000357
start_geohash_dqcjjzu        0.000229 -0.000127
start_geohash_dqcjjzz        0.000193 -0.000176
start_geohash_dqcjkfm        0.000168 -0.000131
start_geohash_dqcjm1h        0.000326 -0.000226
start_geohash_dqcjm2y        0.000029  0.000101
start_geohash_dqcjm8e        0.000033 -0.000003
start_geohash_dqcjm8x        0.000360 -0.000843
start_geohash_dqcjmb7        0.000094 -0.000049
start_geohash_dqcjmbc        0.000107 -0.000118
start_geohash_dqcjmbg        0.000232 -0.000360
start_geohash_dqcjmbh        0.000221  0.000057
start_geohash_dqcjmbn        0.000020  0.000208
start_geohash_dqcjmcp       -0.000044  0.000202
start_geohash_dqcjmgk       -0.000057  0.000181
start_geohash_dqcjmqq       -0.000029  0.000362
start_geohash_dqcjmss        0.000108 -0.000133
start_geohash_dqcjmu8        0.000078  0.000325
start_geohash_dqcjmze       -0.000002  0.000113
start_geohash_dqcjn05        0.000417 -0.000013
start_geohash_dqcjn2y        0.000045  0.000096
start_geohash_dqcjn3z        0.000295  0.000198
start_geohash_dqcjn56        0.000371  0.000021
start_geohash_dqcjn5t        0.000057 -0.000211
start_geohash_dqcjn6h        0.000432 -0.000282
start_geohash_dqcjn6u        0.000160  0.000121
start_geohash_dqcjn6w        0.000247 -0.000399
start_geohash_dqcjn8j        0.000352 -0.000043
start_geohash_dqcjn8s        0.000174 -0.000013
start_geohash_dqcjn8y        0.000248 -0.000098
start_geohash_dqcjn97        0.000276  0.000101
start_geohash_dqcjn9u        0.000425 -0.000121
start_geohash_dqcjn9w        0.000314 -0.000185
start_geohash_dqcjnbb        0.000093  0.000123
start_geohash_dqcjnd5        0.000459  0.000288
start_geohash_dqcjndf        0.000394  0.000598
start_geohash_dqcjndq        0.000489  0.000196
start_geohash_dqcjne0        0.000099  0.000059
start_geohash_dqcjnem        0.000111 -0.000079
start_geohash_dqcjngj        0.003020  0.000736
start_geohash_dqcjnp7        0.000014 -0.000012
start_geohash_dqcjnpg        0.000031 -0.000322
start_geohash_dqcjnpw        0.000065  0.000160
start_geohash_dqcjnqd        0.000037 -0.000446
start_geohash_dqcjnu4        0.000907  0.000064
start_geohash_dqcjnvz        0.002970  0.001146
start_geohash_dqcjnxq        0.000483  0.002779
start_geohash_dqcjnxy       -0.000960  0.001192
start_geohash_dqcjny8        0.001109  0.000106
start_geohash_dqcjnzq        0.000688  0.001245
start_geohash_dqcjp9n       -0.000020  0.000135
start_geohash_dqcjpdb       -0.000103 -0.000158
start_geohash_dqcjpe6        0.000325  0.000357
start_geohash_dqcjpfw       -0.000010  0.000139
start_geohash_dqcjpjs        0.000608  0.001473
start_geohash_dqcjpjz       -0.000592 -0.000032
start_geohash_dqcjpkf        0.000010  0.000219
start_geohash_dqcjpkt       -0.000250  0.000294
start_geohash_dqcjpm7       -0.000179  0.000180
start_geohash_dqcjpm9       -0.000932  0.000818
start_geohash_dqcjpmm       -0.001267 -0.001730
start_geohash_dqcjpnx       -0.000130  0.000125
start_geohash_dqcjpp6       -0.002074  0.001003
start_geohash_dqcjppj       -0.000120 -0.000329
start_geohash_dqcjppp       -0.000114  0.000632
start_geohash_dqcjppt       -0.000557  0.000777
start_geohash_dqcjpqd       -0.000382  0.000403
start_geohash_dqcjpqj       -0.000958  0.000901
start_geohash_dqcjpqt       -0.000641  0.000181
start_geohash_dqcjpr4       -0.000221 -0.000032
start_geohash_dqcjprj       -0.000510  0.000169
start_geohash_dqcjprv       -0.000701 -0.000433
start_geohash_dqcjps1       -0.000025  0.000127
start_geohash_dqcjps4        0.000045  0.000367
start_geohash_dqcjpsm        0.000451  0.000085
start_geohash_dqcjpt1       -0.000128  0.000588
start_geohash_dqcjptf       -0.000009 -0.000193
start_geohash_dqcjptn       -0.000366  0.000147
start_geohash_dqcjptp       -0.000937 -0.000039
start_geohash_dqcjptq       -0.000427 -0.000534
start_geohash_dqcjptt       -0.001388  0.001092
start_geohash_dqcjpty       -0.000334  0.000338
start_geohash_dqcjpub       -0.000257  0.000238
start_geohash_dqcjpv2       -0.000270  0.000058
start_geohash_dqcjpv3       -0.000515 -0.000092
start_geohash_dqcjpvq       -0.000191 -0.000027
start_geohash_dqcjpvv       -0.000076  0.000238
start_geohash_dqcjpwv       -0.000069  0.000326
start_geohash_dqcjpx0       -0.000556 -0.000301
start_geohash_dqcjpy1        0.000770  0.000595
start_geohash_dqcjpyb       -0.000090 -0.000517
start_geohash_dqcjpyt       -0.000479 -0.001178
start_geohash_dqcjpz0       -0.000467 -0.000183
start_geohash_dqcjpzd       -0.000154  0.000506
start_geohash_dqcjpzh        0.000092  0.000617
start_geohash_dqcjpzt       -0.000292 -0.000153
start_geohash_dqcjq00        0.000041 -0.000107
start_geohash_dqcjq03       -0.000019  0.000149
start_geohash_dqcjq06       -0.000084 -0.000115
start_geohash_dqcjq0q       -0.000051  0.000607
start_geohash_dqcjq0s       -0.000149  0.000185
start_geohash_dqcjq0t        0.000028  0.000238
start_geohash_dqcjq0y       -0.000309  0.000512
start_geohash_dqcjq13       -0.000015 -0.000145
start_geohash_dqcjq1j       -0.000086 -0.000079
start_geohash_dqcjq2c        0.000648  0.000484
start_geohash_dqcjq3z       -0.000034  0.000103
start_geohash_dqcjq4y       -0.000687  0.001442
start_geohash_dqcjq5u       -0.000738  0.001120
start_geohash_dqcjq61       -0.000090  0.000829
start_geohash_dqcjq6d       -0.001133  0.000553
start_geohash_dqcjq6h       -0.000061 -0.001023
start_geohash_dqcjq6n       -0.000240  0.000902
start_geohash_dqcjq7f       -0.000069  0.001178
start_geohash_dqcjq7h       -0.001362 -0.000230
start_geohash_dqcjq8n       -0.000328 -0.000249
start_geohash_dqcjq8y       -0.000434  0.000361
start_geohash_dqcjq91        0.000072  0.000688
start_geohash_dqcjq95       -0.000605 -0.000275
start_geohash_dqcjq9q       -0.000128  0.000617
start_geohash_dqcjq9v       -0.000740 -0.000028
start_geohash_dqcjq9w       -0.000538  0.000418
start_geohash_dqcjqb4       -0.000186  0.000241
start_geohash_dqcjqb6       -0.000060 -0.000077
start_geohash_dqcjqb9       -0.000135  0.000525
start_geohash_dqcjqbb       -0.000285  0.000340
start_geohash_dqcjqbc       -0.000055  0.000137
start_geohash_dqcjqbd       -0.000044  0.000072
start_geohash_dqcjqbk       -0.000175 -0.000207
start_geohash_dqcjqbv       -0.000106 -0.000007
start_geohash_dqcjqc0       -0.000275  0.000670
start_geohash_dqcjqc4       -0.000253  0.000468
start_geohash_dqcjqc8       -0.000871  0.000665
start_geohash_dqcjqcb       -0.000420  0.000019
start_geohash_dqcjqce       -0.000256 -0.000547
start_geohash_dqcjqcf       -0.000450  0.000362
start_geohash_dqcjqcj       -0.000359 -0.000316
start_geohash_dqcjqck       -0.000934  0.000817
start_geohash_dqcjqcy       -0.000463 -0.000560
start_geohash_dqcjqd2       -0.001472 -0.000686
start_geohash_dqcjqd5       -0.001010  0.000283
start_geohash_dqcjqdm       -0.000788 -0.000182
start_geohash_dqcjqdt        0.000035 -0.000391
start_geohash_dqcjqdx       -0.001129  0.000061
start_geohash_dqcjqen       -0.000650 -0.000132
start_geohash_dqcjqf1       -0.000491  0.000406
start_geohash_dqcjqf5       -0.000444  0.000564
start_geohash_dqcjqfk       -0.000864  0.000523
start_geohash_dqcjqfn       -0.000452  0.001392
start_geohash_dqcjqfp       -0.000156  0.000171
start_geohash_dqcjqft       -0.000506  0.001019
start_geohash_dqcjqfy       -0.000115 -0.000786
start_geohash_dqcjqg1       -0.001338 -0.000564
start_geohash_dqcjqgc       -0.001566 -0.000652
start_geohash_dqcjqge       -0.000497 -0.001261
start_geohash_dqcjqgg       -0.000121 -0.000432
start_geohash_dqcjqgn       -0.000486 -0.000592
start_geohash_dqcjqgq       -0.001891  0.000328
start_geohash_dqcjqgw       -0.000709  0.000293
start_geohash_dqcjqgz       -0.000819  0.000824
start_geohash_dqcjqjd       -0.000287 -0.000106
start_geohash_dqcjqjx        0.000056 -0.000532
start_geohash_dqcjqk8       -0.000366  0.000417
start_geohash_dqcjqn0       -0.000066  0.000243
start_geohash_dqcjqp4       -0.000077 -0.000418
start_geohash_dqcjqpv       -0.000259  0.000839
start_geohash_dqcjqtu       -0.000875 -0.000360
start_geohash_dqcjqtv       -0.000369 -0.000218
start_geohash_dqcjqtx       -0.000178  0.000286
start_geohash_dqcjqu3       -0.000377 -0.000105
start_geohash_dqcjquc       -0.000157 -0.000571
start_geohash_dqcjquq       -0.001587 -0.000017
start_geohash_dqcjquu       -0.002438 -0.002077
start_geohash_dqcjqv6       -0.000650  0.001126
start_geohash_dqcjqve       -0.001662 -0.002600
start_geohash_dqcjqvu       -0.001366 -0.000068
start_geohash_dqcjqx1       -0.000545  0.000325
start_geohash_dqcjqx8       -0.000202 -0.000062
start_geohash_dqcjqyd       -0.000289 -0.000189
start_geohash_dqcjqyr       -0.001506 -0.000480
start_geohash_dqcjqyz       -0.000633  0.000078
start_geohash_dqcjqzq       -0.000942 -0.000057
start_geohash_dqcjqzu        0.000031  0.000166
start_geohash_dqcjr04       -0.000760 -0.000279
start_geohash_dqcjr0e       -0.000443 -0.000558
start_geohash_dqcjr0f       -0.000209  0.000040
start_geohash_dqcjr0r       -0.000188  0.000097
start_geohash_dqcjr0z       -0.000380  0.000220
start_geohash_dqcjr15       -0.000642  0.000054
start_geohash_dqcjr16       -0.000470  0.000958
start_geohash_dqcjr17       -0.000232  0.000381
start_geohash_dqcjr19       -0.000587 -0.000593
start_geohash_dqcjr1c       -0.000495  0.000236
start_geohash_dqcjr1n       -0.000928 -0.000197
start_geohash_dqcjr1p       -0.000352 -0.000085
start_geohash_dqcjr1s       -0.000356  0.000135
start_geohash_dqcjr1t       -0.000559  0.000674
start_geohash_dqcjr1z       -0.000449  0.000346
start_geohash_dqcjr22       -0.000279  0.001297
start_geohash_dqcjr29       -0.000345  0.000397
start_geohash_dqcjr2e       -0.000238  0.000122
start_geohash_dqcjr2w       -0.000161 -0.000045
start_geohash_dqcjr30       -0.000177  0.000289
start_geohash_dqcjr33       -0.000681 -0.000494
start_geohash_dqcjr34       -0.000655  0.000037
start_geohash_dqcjr3h       -0.000258  0.000592
start_geohash_dqcjr3r       -0.000193 -0.000101
start_geohash_dqcjr3s       -0.000515  0.000172
start_geohash_dqcjr40       -0.000315  0.000262
start_geohash_dqcjr45       -0.000814  0.001339
start_geohash_dqcjr49       -0.000241  0.000611
start_geohash_dqcjr4d       -0.001234 -0.000158
start_geohash_dqcjr4j       -0.000417 -0.000534
start_geohash_dqcjr4w       -0.001654 -0.001416
start_geohash_dqcjr53       -0.002582 -0.001118
start_geohash_dqcjr54       -0.001748  0.000461
start_geohash_dqcjr5e       -0.000820 -0.000177
start_geohash_dqcjr5g       -0.001926 -0.002713
start_geohash_dqcjr5h       -0.000972  0.001135
start_geohash_dqcjr5n       -0.000909 -0.001229
start_geohash_dqcjr64       -0.001112 -0.000319
start_geohash_dqcjr6b       -0.000762  0.000620
start_geohash_dqcjr6d       -0.001285 -0.001342
start_geohash_dqcjr6h       -0.002063 -0.000186
start_geohash_dqcjr6z       -0.000802  0.000655
start_geohash_dqcjr71       -0.001324  0.000934
start_geohash_dqcjr7c       -0.000624 -0.000381
start_geohash_dqcjr7v       -0.000489  0.000302
start_geohash_dqcjr80       -0.001018 -0.000357
start_geohash_dqcjr8x       -0.001239 -0.000571
start_geohash_dqcjr91       -0.001075 -0.000112
start_geohash_dqcjr95       -0.000724 -0.001546
start_geohash_dqcjr97       -0.000548 -0.000041
start_geohash_dqcjr99       -0.000586  0.000452
start_geohash_dqcjr9n       -0.002591 -0.003382
start_geohash_dqcjr9r       -0.001015 -0.000690
start_geohash_dqcjr9u       -0.000309 -0.000781
start_geohash_dqcjr9y       -0.001853 -0.000360
start_geohash_dqcjrb5       -0.000398 -0.001016
start_geohash_dqcjrbj       -0.000226 -0.000083
start_geohash_dqcjrbt       -0.000426 -0.000013
start_geohash_dqcjrbz       -0.000456  0.000383
start_geohash_dqcjrc8       -0.001242 -0.001184
start_geohash_dqcjrce       -0.001308  0.000185
start_geohash_dqcjrcn       -0.000763  0.000733
start_geohash_dqcjrct       -0.000821 -0.000033
start_geohash_dqcjrcv       -0.000030  0.000121
start_geohash_dqcjrcx       -0.000282 -0.000324
start_geohash_dqcjrd0       -0.000504  0.000215
start_geohash_dqcjrd1       -0.000302  0.000684
start_geohash_dqcjrdw        0.000064 -0.001065
start_geohash_dqcjre1        0.000351 -0.000059
start_geohash_dqcjreb       -0.000751  0.000357
start_geohash_dqcjreg       -0.000511 -0.000009
start_geohash_dqcjrew       -0.000500  0.001029
start_geohash_dqcjrf5       -0.000494  0.000053
start_geohash_dqcjrf8       -0.000741 -0.000329
start_geohash_dqcjrfb       -0.000359  0.000269
start_geohash_dqcjrfc       -0.001395 -0.000890
start_geohash_dqcjrff       -0.001097  0.000021
start_geohash_dqcjrfq       -0.000108 -0.000150
start_geohash_dqcjrfu       -0.000009 -0.000931
start_geohash_dqcjrg8       -0.000438  0.000268
start_geohash_dqcjrhc       -0.000672 -0.000188
start_geohash_dqcjrhg       -0.001562 -0.001047
start_geohash_dqcjrhn       -0.000726  0.000576
start_geohash_dqcjrhs       -0.000891  0.000024
start_geohash_dqcjrht       -0.000527 -0.000344
start_geohash_dqcjrhx       -0.000659 -0.000523
start_geohash_dqcjrhy       -0.000411 -0.000098
start_geohash_dqcjrj7       -0.001567 -0.000570
start_geohash_dqcjrjb       -0.000402  0.000326
start_geohash_dqcjrjp       -0.000441 -0.000421
start_geohash_dqcjrjw       -0.000435  0.000300
start_geohash_dqcjrk4       -0.001300  0.000029
start_geohash_dqcjrk6       -0.000692 -0.000369
start_geohash_dqcjrkc       -0.000853 -0.000339
start_geohash_dqcjrkw       -0.000259  0.000564
start_geohash_dqcjrmm       -0.000012  0.000430
start_geohash_dqcjrn4       -0.000455 -0.000858
start_geohash_dqcjrn6       -0.000433 -0.000207
start_geohash_dqcjrnd       -0.000979  0.000845
start_geohash_dqcjrnn       -0.000989 -0.001869
start_geohash_dqcjrny       -0.000684 -0.000387
start_geohash_dqcjrp5       -0.000182 -0.000411
start_geohash_dqcjrp6       -0.000370 -0.000116
start_geohash_dqcjrpf       -0.000646  0.000749
start_geohash_dqcjrpj       -0.000627 -0.000121
start_geohash_dqcjrq4       -0.000030 -0.000010
start_geohash_dqcjrq9       -0.000578  0.000222
start_geohash_dqcjrr3       -0.000132 -0.000449
start_geohash_dqcjrs3       -0.001041 -0.001204
start_geohash_dqcjrsh       -0.000216 -0.000594
start_geohash_dqcjrsp       -0.000255  0.000222
start_geohash_dqcjrsy       -0.000194 -0.000082
start_geohash_dqcjruc       -0.000225 -0.000948
start_geohash_dqcjrvb        0.000256  0.000264
start_geohash_dqcjrvm       -0.000075  0.000379
start_geohash_dqcjrvt       -0.000160  0.000589
start_geohash_dqcjrvx       -0.000051 -0.000451
start_geohash_dqcjrwc       -0.000052 -0.000438
start_geohash_dqcjrys       -0.000082 -0.000444
start_geohash_dqcjrzm       -0.000369 -0.000243
start_geohash_dqcjrzr       -0.000120  0.000147
start_geohash_dqcjrzv        0.000025  0.000080
start_geohash_dqcjt2h        0.000035  0.000011
start_geohash_dqcjtb4       -0.000205 -0.001365
start_geohash_dqcjtbd       -0.000093  0.000614
start_geohash_dqcjtfx       -0.000054 -0.000022
start_geohash_dqcjtmr       -0.000002 -0.000139
start_geohash_dqcjtu9        0.000065 -0.000093
start_geohash_dqcjtug        0.000062 -0.000007
start_geohash_dqcjtuh       -0.000003  0.000200
start_geohash_dqcjtxt        0.000136  0.000137
start_geohash_dqcjv7d        0.000330  0.000105
start_geohash_dqcjv87        0.001319  0.000104
start_geohash_dqcjv8n        0.000026 -0.000095
start_geohash_dqcjv8r        0.000141  0.000175
start_geohash_dqcjv8v        0.000949  0.000564
start_geohash_dqcjv9f        0.000300 -0.000137
start_geohash_dqcjv9s        0.000219 -0.000082
start_geohash_dqcjvd1        0.000156 -0.000246
start_geohash_dqcjvd3        0.000970  0.000250
start_geohash_dqcjvee        0.000129 -0.000129
start_geohash_dqcjw0e       -0.000076  0.000443
start_geohash_dqcjw14        0.000038 -0.000013
start_geohash_dqcjw16        0.000008  0.000486
start_geohash_dqcjw1b       -0.000006 -0.000046
start_geohash_dqcjw2p       -0.000199 -0.000289
start_geohash_dqcjw3e        0.000012 -0.000503
start_geohash_dqcjw3j       -0.000042 -0.000037
start_geohash_dqcjw40       -0.000042  0.000057
start_geohash_dqcjw5x        0.000135  0.000871
start_geohash_dqcjw64       -0.000047 -0.000283
start_geohash_dqcjwcn        0.000100 -0.000262
start_geohash_dqcjwj7        0.000026  0.000410
start_geohash_dqcjwm2        0.000025  0.000242
start_geohash_dqcjx06       -0.000056  0.000600
start_geohash_dqcjx0n        0.000223  0.000354
start_geohash_dqcjx16        0.000068 -0.000051
start_geohash_dqcjx20       -0.000411 -0.000652
start_geohash_dqcjx2k       -0.000134  0.000370
start_geohash_dqcjx30       -0.000058 -0.000165
start_geohash_dqcjx3h       -0.000055 -0.000239
start_geohash_dqcjx44        0.000112  0.000335
start_geohash_dqcjx4w        0.000052 -0.000078
start_geohash_dqcjx5f        0.000100 -0.000243
start_geohash_dqcjx5r        0.000030  0.000273
start_geohash_dqcjx5v        0.000028  0.000191
start_geohash_dqcjx66        0.000125  0.000119
start_geohash_dqcjx7g        0.000081  0.000079
start_geohash_dqcjxbr       -0.000240 -0.000131
start_geohash_dqcjxdb       -0.000051 -0.000213
start_geohash_dqcjxg0        0.000078 -0.001339
start_geohash_dqcjxgb       -0.000019  0.000020
start_geohash_dqcjxhw        0.000305 -0.000041
start_geohash_dqcjxjy        0.000103 -0.000173
start_geohash_dqcjxn1        0.000161  0.000116
start_geohash_dqcjxpq        0.000317  0.000323
start_geohash_dqcjxqn        0.000182  0.000174
start_geohash_dqcjxrn        0.001275  0.000213
start_geohash_dqcjxs0        0.000138 -0.000097
start_geohash_dqcjxut        0.000044 -0.000246
start_geohash_dqcjxx4        0.000220  0.000053
start_geohash_dqcjxxt        0.000142 -0.000055
start_geohash_dqcjxzj        0.000201  0.000126
start_geohash_dqcjyfy        0.000150  0.000071
start_geohash_dqcjz1h        0.000022  0.000005
start_geohash_dqcjz31        0.000166  0.000023
start_geohash_dqcjz4s        0.000142 -0.000096
start_geohash_dqcjz51        0.000196  0.000130
start_geohash_dqcjz5p        0.000292  0.000296
start_geohash_dqcjz6n        0.000223 -0.000073
start_geohash_dqcjz8v        0.000338 -0.000385
start_geohash_dqcjzc6        0.000683  0.000524
start_geohash_dqckbp7        0.000161  0.000195
start_geohash_dqckbqs        0.000104  0.000162
start_geohash_dqckbrk        0.000040  0.000024
start_geohash_dqcm01m        0.000069  0.000031
start_geohash_dqcm04q        0.000041  0.000268
start_geohash_dqcm05d       -0.000016  0.000161
start_geohash_dqcm05m       -0.000011 -0.000154
start_geohash_dqcm0d8        0.000063  0.000124
start_geohash_dqcm0ez        0.000125 -0.000196
start_geohash_dqcm0hp        0.000020 -0.000128
start_geohash_dqcm0hw        0.000075  0.000203
start_geohash_dqcm0ju       -0.000106  0.000492
start_geohash_dqcm0kx        0.000036 -0.000053
start_geohash_dqcm0mx        0.000445  0.000738
start_geohash_dqcm0n3       -0.000220  0.000138
start_geohash_dqcm0n7       -0.000005 -0.000236
start_geohash_dqcm0nb        0.000319 -0.000057
start_geohash_dqcm0ns        0.000364 -0.000138
start_geohash_dqcm0pf       -0.000441 -0.000343
start_geohash_dqcm0pn       -0.000255  0.000248
start_geohash_dqcm0pw       -0.000313 -0.000295
start_geohash_dqcm0q2       -0.000049  0.000258
start_geohash_dqcm0qf       -0.000161  0.000141
start_geohash_dqcm0re       -0.000207  0.000126
start_geohash_dqcm0xy        0.000026 -0.000041
start_geohash_dqcm1r8        0.000066  0.000315
start_geohash_dqcm1w8        0.000154 -0.000160
start_geohash_dqcm20e       -0.000195  0.000010
start_geohash_dqcm20q       -0.000135 -0.000088
start_geohash_dqcm21d       -0.000141  0.000575
start_geohash_dqcm21n       -0.000461 -0.000343
start_geohash_dqcm21w       -0.000198 -0.000294
start_geohash_dqcm22s       -0.000084 -0.000202
start_geohash_dqcm231       -0.000170 -0.000191
start_geohash_dqcm23c       -0.000326 -0.001030
start_geohash_dqcm245       -0.000018 -0.000108
start_geohash_dqcm24g       -0.000164 -0.000010
start_geohash_dqcm24w       -0.000121  0.000025
start_geohash_dqcm25s        0.000013 -0.000156
start_geohash_dqcm27y        0.000228  0.000723
start_geohash_dqcm28c        0.000020  0.000067
start_geohash_dqcm290       -0.000040  0.000100
start_geohash_dqcm2hq       -0.000056  0.000516
start_geohash_dqcm2ju       -0.000018  0.000047
start_geohash_dqcm2n8       -0.000009 -0.000796
start_geohash_dqcm2p8       -0.000052  0.000318
start_geohash_dqcm2r9       -0.000081 -0.000067
start_geohash_dqcm2rn        0.000126 -0.000161
start_geohash_dqcm2w5        0.000008 -0.000043
start_geohash_dqcm318       -0.000009  0.000046
start_geohash_dqcm375        0.000126  0.000121
start_geohash_dqcm5z9        0.000441  0.000312
start_geohash_dqcm737        0.000428  0.000024
start_geohash_dqcm813        0.000013 -0.000201
start_geohash_dqcm830        0.000040 -0.000224
start_geohash_dqcm839        0.000049 -0.000020
start_geohash_dqcm8be        0.000090 -0.000029
start_geohash_dqcm8e9        0.000435  0.000411
start_geohash_dqcm8er        0.000263 -0.000407
start_geohash_dqcm8q8        0.000174 -0.000020
start_geohash_dqcm8v7        0.000272 -0.000208
start_geohash_dqcm8y5        0.000163  0.000069
start_geohash_dqcm95t        0.000156 -0.000232
start_geohash_dqcm9h3        0.000153  0.000165
start_geohash_dqcm9kc        0.000500  0.000457
start_geohash_dqcm9np        0.000204 -0.000047
start_geohash_dqcmfeg        0.000404  0.000168
start_geohash_dqcnhuw        0.001065 -0.000245
start_geohash_dqcnj5x        0.000353 -0.000212
start_geohash_dqcnk9e        0.000494 -0.000018
start_geohash_dqcnke2        0.000480 -0.000036
start_geohash_dqcnkjn        0.001483 -0.000410
start_geohash_dqcnndr        0.000203 -0.000139
start_geohash_dqcnndt        0.001030 -0.000306
start_geohash_dqcnnsm        0.000890 -0.000336
start_geohash_dqcns19        0.000559  0.000132
start_geohash_dqcns48        0.000688  0.000061
start_geohash_dqcq35j        0.000460 -0.000229
end_geohash_dqbvxdr          0.001303  0.000007
end_geohash_dqbvxe9          0.001207  0.000036
end_geohash_dqbvxek          0.001318 -0.000042
end_geohash_dqbvxeu          0.001210 -0.000276
end_geohash_dqbvxfe          0.000814 -0.000097
end_geohash_dqbvxgd          0.002390 -0.000005
end_geohash_dqbvxsm          0.002447 -0.000081
end_geohash_dqbvxsw          0.001419  0.000170
end_geohash_dqbvxv0          0.002420 -0.000126
end_geohash_dqbvxv5          0.002415  0.000006
end_geohash_dqbvxyf          0.001202 -0.000079
end_geohash_dqchbuk          0.000767 -0.000093
end_geohash_dqchbv1          0.000797 -0.000110
end_geohash_dqchbyf          0.001454 -0.000524
end_geohash_dqchtzv          0.000221  0.000098
end_geohash_dqchub1          0.000314  0.000041
end_geohash_dqchv1g          0.000442 -0.000010
end_geohash_dqchv6t          0.000241 -0.000244
end_geohash_dqchv8b          0.000136 -0.000057
end_geohash_dqchvk9          0.000186  0.000138
end_geohash_dqchvqg          0.000416  0.000243
end_geohash_dqchvqj          0.000179 -0.000011
end_geohash_dqchvue          0.000221  0.000242
end_geohash_dqchvwn          0.000357 -0.000116
end_geohash_dqchvy9          0.000198  0.000094
end_geohash_dqchvz7          0.000138  0.000323
end_geohash_dqchwpv          0.001144  0.000037
end_geohash_dqchwr8          0.000521 -0.000125
end_geohash_dqchwrg          0.000445 -0.000104
end_geohash_dqchwwn          0.001075 -0.000129
end_geohash_dqchwwy          0.000465  0.000023
end_geohash_dqchwxr          0.000197 -0.000112
end_geohash_dqchwz3          0.001080  0.000420
end_geohash_dqchwzs          0.000774 -0.000106
end_geohash_dqchwzv          0.000498 -0.000242
end_geohash_dqchxkn          0.000280 -0.000142
end_geohash_dqchxky          0.001095 -0.000193
end_geohash_dqchxyb          0.001400  0.000187
end_geohash_dqchy2n          0.000227 -0.000140
end_geohash_dqchy2w          0.001180 -0.000215
end_geohash_dqchy6u          0.000172 -0.000060
end_geohash_dqchy83          0.000484  0.000072
end_geohash_dqchy87          0.000232  0.000239
end_geohash_dqchy8m          0.000707 -0.000374
end_geohash_dqchy9g          0.001374  0.000221
end_geohash_dqchy9t          0.000415 -0.000106
end_geohash_dqchy9y          0.000177  0.000080
end_geohash_dqchyb1          0.000099 -0.000103
end_geohash_dqchyb4          0.001408  0.000093
end_geohash_dqchyc3          0.000353  0.000145
end_geohash_dqchycd          0.000627 -0.000131
end_geohash_dqchycg          0.000905 -0.000473
end_geohash_dqchycs          0.000374  0.000140
end_geohash_dqchydb          0.000593 -0.000051
end_geohash_dqchydm          0.000480  0.000008
end_geohash_dqchye5          0.000630 -0.000118
end_geohash_dqchyeq          0.000300 -0.000135
end_geohash_dqchyer          0.000104  0.000043
end_geohash_dqchykp          0.000867 -0.000634
end_geohash_dqchymq          0.000341 -0.000461
end_geohash_dqchypz          0.000296  0.000258
end_geohash_dqchyqn          0.000063  0.000099
end_geohash_dqchyqu          0.000184  0.000451
end_geohash_dqchyr5          0.000393  0.000184
end_geohash_dqchysx          0.000092 -0.000023
end_geohash_dqchytn          0.000528 -0.000241
end_geohash_dqchytr          0.000924 -0.000296
end_geohash_dqchyw0          0.000259  0.000187
end_geohash_dqchyxj          0.000053 -0.000008
end_geohash_dqchyxm          0.000521 -0.000563
end_geohash_dqchyxs          0.000271  0.000226
end_geohash_dqchzvc          0.000082 -0.000017
end_geohash_dqchzz4          0.000094  0.000002
end_geohash_dqcj1eh          0.000590 -0.000045
end_geohash_dqcj1tm          0.002916 -0.000501
end_geohash_dqcj4ey          0.000482 -0.000249
end_geohash_dqcj4s4          0.000955 -0.000218
end_geohash_dqcj4sd          0.001464 -0.000023
end_geohash_dqcj4tb          0.000457 -0.000255
end_geohash_dqcj4td          0.000524 -0.000003
end_geohash_dqcj4w4          0.000500 -0.000121
end_geohash_dqcj4wk          0.001463  0.000103
end_geohash_dqcj5j3          0.000428 -0.000130
end_geohash_dqcj5mk          0.000360  0.000050
end_geohash_dqcj5vb          0.000613  0.000033
end_geohash_dqcj5xr          0.000296 -0.000037
end_geohash_dqcj5yg          0.000835  0.000028
end_geohash_dqcj5ys          0.000739 -0.000085
end_geohash_dqcj6f2          0.000896 -0.000287
end_geohash_dqcj6wt          0.000842 -0.000364
end_geohash_dqcj6y9          0.000377 -0.000203
end_geohash_dqcj6yk          0.000488 -0.000206
end_geohash_dqcj6z6          0.000486  0.000037
end_geohash_dqcj798          0.000391 -0.000066
end_geohash_dqcj7jf          0.000932  0.000219
end_geohash_dqcj7nj          0.000536 -0.000021
end_geohash_dqcj80h          0.002124  0.000866
end_geohash_dqcj845          0.003082 -0.000068
end_geohash_dqcj84f          0.004538  0.000089
end_geohash_dqcj861          0.001930  0.000152
end_geohash_dqcj8n7          0.001188 -0.000194
end_geohash_dqcjb09          0.001386 -0.000052
end_geohash_dqcjhkd          0.000241  0.000119
end_geohash_dqcjhmd          0.000515  0.000145
end_geohash_dqcjhph          0.000526 -0.000229
end_geohash_dqcjhqb          0.001775 -0.000557
end_geohash_dqcjhvj          0.000243  0.000286
end_geohash_dqcjhwf          0.000214  0.000148
end_geohash_dqcjhwp          0.000131  0.000189
end_geohash_dqcjhzm          0.000048  0.000075
end_geohash_dqcjj1h          0.000117 -0.000084
end_geohash_dqcjj2v          0.000149 -0.000158
end_geohash_dqcjj39          0.001015 -0.000445
end_geohash_dqcjj3d          0.000134 -0.000030
end_geohash_dqcjj6p          0.000531 -0.000064
end_geohash_dqcjj82          0.000277 -0.000062
end_geohash_dqcjj8j          0.000274 -0.000283
end_geohash_dqcjjdq          0.000107 -0.000134
end_geohash_dqcjjeu          0.000316 -0.000200
end_geohash_dqcjjf9          0.000337 -0.000443
end_geohash_dqcjjfe          0.000461  0.001606
end_geohash_dqcjjfz          0.000139  0.000078
end_geohash_dqcjjgn          0.000140  0.000276
end_geohash_dqcjjjd          0.000161  0.000018
end_geohash_dqcjjjz          0.000716  0.000394
end_geohash_dqcjjm5          0.000309 -0.000098
end_geohash_dqcjjmg          0.000978 -0.000574
end_geohash_dqcjjnt          0.000257 -0.000065
end_geohash_dqcjjq0          0.000854 -0.000490
end_geohash_dqcjjq2          0.000190  0.000035
end_geohash_dqcjjq6          0.000136  0.000085
end_geohash_dqcjjqe          0.000449  0.000086
end_geohash_dqcjjqr          0.000618  0.000568
end_geohash_dqcjjr3          0.000769  0.000309
end_geohash_dqcjjuw          0.000129 -0.000036
end_geohash_dqcjjw9          0.000487 -0.000236
end_geohash_dqcjjwg          0.000427 -0.000094
end_geohash_dqcjjwy          0.000148  0.000154
end_geohash_dqcjjxh          0.000650 -0.000179
end_geohash_dqcjjxq          0.000299 -0.000019
end_geohash_dqcjjxx          0.000287 -0.000420
end_geohash_dqcjjy0          0.000241 -0.000464
end_geohash_dqcjjy5          0.000389 -0.000420
end_geohash_dqcjjyf          0.000089 -0.000251
end_geohash_dqcjjzc          0.000175 -0.000352
end_geohash_dqcjjzh          0.000241  0.000225
end_geohash_dqcjjzu          0.000250 -0.000328
end_geohash_dqcjjzz          0.000059 -0.000326
end_geohash_dqcjkfm          0.000285  0.000014
end_geohash_dqcjm1h          0.000459  0.000012
end_geohash_dqcjm2y          0.000187 -0.000457
end_geohash_dqcjm8x          0.000089 -0.000149
end_geohash_dqcjmb7          0.000034  0.000097
end_geohash_dqcjmbc          0.000198 -0.000241
end_geohash_dqcjmbg          0.000059 -0.000232
end_geohash_dqcjmbh          0.000131 -0.000498
end_geohash_dqcjmbn          0.000072  0.000064
end_geohash_dqcjmcp          0.000014 -0.000279
end_geohash_dqcjmgk         -0.000015  0.000080
end_geohash_dqcjmqq          0.000120 -0.000094
end_geohash_dqcjmss          0.000109  0.000025
end_geohash_dqcjmu8          0.000057 -0.000137
end_geohash_dqcjmze          0.000063  0.000233
end_geohash_dqcjmzj          0.000033  0.000246
end_geohash_dqcjn05          0.000189  0.000150
end_geohash_dqcjn2y          0.000046 -0.000249
end_geohash_dqcjn3z          0.000230 -0.000015
end_geohash_dqcjn4f          0.000182  0.000215
end_geohash_dqcjn56          0.000344  0.000802
end_geohash_dqcjn5t          0.000215  0.000129
end_geohash_dqcjn6h          0.000280 -0.000327
end_geohash_dqcjn6u          0.000621  0.000101
end_geohash_dqcjn6w          0.000551  0.000177
end_geohash_dqcjn8j          0.000125  0.000089
end_geohash_dqcjn8s          0.000200  0.000169
end_geohash_dqcjn8y          0.000220 -0.000328
end_geohash_dqcjn97          0.000092  0.000076
end_geohash_dqcjn9u          0.000243 -0.000079
end_geohash_dqcjn9w          0.000457 -0.000534
end_geohash_dqcjnbb          0.000419  0.000078
end_geohash_dqcjnd5          0.000296 -0.000218
end_geohash_dqcjndf          0.000423  0.000377
end_geohash_dqcjndq          0.000268  0.000094
end_geohash_dqcjne0          0.000129  0.000079
end_geohash_dqcjnem          0.000002  0.000047
end_geohash_dqcjngj          0.001287  0.000322
end_geohash_dqcjnp7          0.000050  0.000021
end_geohash_dqcjnpg          0.000180  0.000179
end_geohash_dqcjnpw         -0.000019 -0.000291
end_geohash_dqcjnqd          0.000099  0.000251
end_geohash_dqcjnu4          0.000237  0.000185
end_geohash_dqcjnvz         -0.000300  0.000732
end_geohash_dqcjnxq         -0.000789  0.001236
end_geohash_dqcjnxy         -0.000888  0.000611
end_geohash_dqcjny8          0.000315  0.000979
end_geohash_dqcjnzq         -0.000302  0.000517
end_geohash_dqcjp9n          0.000020  0.000063
end_geohash_dqcjpdb         -0.000050 -0.000176
end_geohash_dqcjpe6         -0.000160 -0.000516
end_geohash_dqcjpfw          0.000012  0.000175
end_geohash_dqcjpjs         -0.000191  0.001723
end_geohash_dqcjpjz         -0.000363  0.000544
end_geohash_dqcjpkf         -0.000040 -0.000316
end_geohash_dqcjpkt         -0.000232  0.000291
end_geohash_dqcjpm7         -0.000317  0.000315
end_geohash_dqcjpm9         -0.000924  0.001379
end_geohash_dqcjpmm         -0.000638 -0.000329
end_geohash_dqcjpnx         -0.000126  0.000474
end_geohash_dqcjpp6         -0.001157  0.001258
end_geohash_dqcjppj         -0.000139  0.000266
end_geohash_dqcjppp         -0.000179 -0.000080
end_geohash_dqcjppt         -0.000444  0.000464
end_geohash_dqcjpqd         -0.000282  0.000273
end_geohash_dqcjpqj         -0.000846 -0.000323
end_geohash_dqcjpqt         -0.000137  0.000057
end_geohash_dqcjpr4         -0.000294 -0.000107
end_geohash_dqcjprj         -0.000165  0.000480
end_geohash_dqcjprv         -0.000796  0.001191
end_geohash_dqcjps1         -0.000124 -0.000082
end_geohash_dqcjps4         -0.000104 -0.000199
end_geohash_dqcjpsm         -0.000137 -0.000385
end_geohash_dqcjpt1         -0.000096  0.000120
end_geohash_dqcjptf         -0.000034 -0.000181
end_geohash_dqcjptn         -0.000170  0.000345
end_geohash_dqcjptp         -0.000250  0.000621
end_geohash_dqcjptq         -0.000723  0.000400
end_geohash_dqcjptt         -0.000690 -0.001092
end_geohash_dqcjpty         -0.000623 -0.000866
end_geohash_dqcjpub         -0.000357 -0.000319
end_geohash_dqcjpv2         -0.000283 -0.000100
end_geohash_dqcjpv3         -0.000557 -0.000516
end_geohash_dqcjpvq         -0.000026 -0.000102
end_geohash_dqcjpvv         -0.000085  0.000065
end_geohash_dqcjpwv         -0.000646  0.000211
end_geohash_dqcjpx0         -0.000329  0.000332
end_geohash_dqcjpy1         -0.000706 -0.000415
end_geohash_dqcjpyb         -0.000416 -0.000403
end_geohash_dqcjpyt         -0.000982 -0.000535
end_geohash_dqcjpyw         -0.000056  0.000065
end_geohash_dqcjpz0         -0.000927  0.000591
end_geohash_dqcjpzd         -0.000442  0.000296
end_geohash_dqcjpzh         -0.000852  0.000196
end_geohash_dqcjpzt         -0.000345 -0.000020
end_geohash_dqcjq00          0.000108  0.000450
end_geohash_dqcjq03          0.000196 -0.000379
end_geohash_dqcjq06          0.000059 -0.000104
end_geohash_dqcjq0q         -0.000027  0.000491
end_geohash_dqcjq0s          0.000102 -0.000309
end_geohash_dqcjq0t          0.000167  0.000178
end_geohash_dqcjq0y         -0.000032  0.000142
end_geohash_dqcjq13         -0.000007  0.000035
end_geohash_dqcjq1j          0.000172 -0.000025
end_geohash_dqcjq2c          0.000479  0.000313
end_geohash_dqcjq3z         -0.000428  0.000467
end_geohash_dqcjq4y         -0.000761 -0.000956
end_geohash_dqcjq5u         -0.000199 -0.001143
end_geohash_dqcjq61         -0.000125  0.001478
end_geohash_dqcjq6d         -0.000631  0.001106
end_geohash_dqcjq6h         -0.000372  0.001154
end_geohash_dqcjq6n         -0.000625  0.001279
end_geohash_dqcjq7f         -0.000321  0.000869
end_geohash_dqcjq7h         -0.000469  0.000143
end_geohash_dqcjq8n         -0.000328  0.000230
end_geohash_dqcjq8y         -0.000546  0.000735
end_geohash_dqcjq91         -0.000168  0.000075
end_geohash_dqcjq95         -0.000393  0.000751
end_geohash_dqcjq9q         -0.000372  0.000028
end_geohash_dqcjq9v         -0.001460 -0.000848
end_geohash_dqcjq9w         -0.000380 -0.000196
end_geohash_dqcjqb4         -0.000097  0.000209
end_geohash_dqcjqb6         -0.000070  0.000100
end_geohash_dqcjqbb         -0.000313  0.000192
end_geohash_dqcjqbc         -0.000317 -0.000788
end_geohash_dqcjqbd         -0.000115 -0.000244
end_geohash_dqcjqbk         -0.000131 -0.000052
end_geohash_dqcjqbv         -0.000274  0.000955
end_geohash_dqcjqc0         -0.000433  0.000051
end_geohash_dqcjqc4         -0.000417 -0.000101
end_geohash_dqcjqc8         -0.000600 -0.000866
end_geohash_dqcjqcb         -0.000358 -0.000262
end_geohash_dqcjqce         -0.000112 -0.000235
end_geohash_dqcjqcf         -0.000587 -0.000406
end_geohash_dqcjqcj         -0.000420  0.000498
end_geohash_dqcjqck         -0.000568  0.000387
end_geohash_dqcjqcy         -0.000548  0.000313
end_geohash_dqcjqd2         -0.000696  0.001563
end_geohash_dqcjqd5         -0.001151  0.000104
end_geohash_dqcjqdm         -0.001259  0.000988
end_geohash_dqcjqdt         -0.000645  0.000609
end_geohash_dqcjqdx         -0.001324 -0.001553
end_geohash_dqcjqen         -0.000740 -0.000107
end_geohash_dqcjqf1         -0.000699 -0.000638
end_geohash_dqcjqf5         -0.000370  0.001129
end_geohash_dqcjqfk         -0.000281 -0.000078
end_geohash_dqcjqfn         -0.000697 -0.000847
end_geohash_dqcjqfp         -0.000133  0.000357
end_geohash_dqcjqft         -0.000648 -0.001105
end_geohash_dqcjqfy         -0.000638 -0.000354
end_geohash_dqcjqg1         -0.000806 -0.000041
end_geohash_dqcjqg6         -0.002288  0.000858
end_geohash_dqcjqgc         -0.001186  0.000142
end_geohash_dqcjqge         -0.001331 -0.000944
end_geohash_dqcjqgg         -0.001208 -0.000107
end_geohash_dqcjqgn         -0.000676 -0.000503
end_geohash_dqcjqgq         -0.001203  0.000147
end_geohash_dqcjqgw         -0.001648  0.000685
end_geohash_dqcjqgz         -0.000666  0.000019
end_geohash_dqcjqjd         -0.000114  0.000664
end_geohash_dqcjqjx         -0.000124  0.000953
end_geohash_dqcjqk8         -0.000264  0.000659
end_geohash_dqcjqn0         -0.000049 -0.000440
end_geohash_dqcjqp4          0.000006  0.000159
end_geohash_dqcjqpv         -0.000018 -0.000563
end_geohash_dqcjqtu         -0.000420 -0.000453
end_geohash_dqcjqtv         -0.000751  0.000016
end_geohash_dqcjqtx         -0.000291  0.000097
end_geohash_dqcjqu3         -0.000505 -0.000598
end_geohash_dqcjquc         -0.000727 -0.000334
end_geohash_dqcjquq         -0.002649 -0.000359
end_geohash_dqcjquu         -0.002138 -0.001350
end_geohash_dqcjqv6         -0.000409 -0.000263
end_geohash_dqcjqve         -0.001169 -0.001235
end_geohash_dqcjqvu         -0.000770  0.000856
end_geohash_dqcjqx1         -0.000193  0.000411
end_geohash_dqcjqx8         -0.000053 -0.000485
end_geohash_dqcjqyd         -0.000164  0.000431
end_geohash_dqcjqyr         -0.000481 -0.000598
end_geohash_dqcjqyz         -0.000584  0.000048
end_geohash_dqcjqzq         -0.000313  0.000177
end_geohash_dqcjqzu          0.000028  0.000028
end_geohash_dqcjr04         -0.000572  0.000512
end_geohash_dqcjr0e         -0.000347  0.000021
end_geohash_dqcjr0f         -0.000181  0.000405
end_geohash_dqcjr0r         -0.000226  0.000064
end_geohash_dqcjr0z         -0.000351 -0.000235
end_geohash_dqcjr15         -0.000294  0.000428
end_geohash_dqcjr16         -0.000734 -0.000465
end_geohash_dqcjr17         -0.000552  0.000435
end_geohash_dqcjr19         -0.000839  0.000074
end_geohash_dqcjr1c         -0.000222  0.000847
end_geohash_dqcjr1n         -0.001153 -0.000808
end_geohash_dqcjr1p         -0.000372  0.000721
end_geohash_dqcjr1s         -0.000334  0.000228
end_geohash_dqcjr1t         -0.000651  0.000175
end_geohash_dqcjr1z         -0.000565 -0.000355
end_geohash_dqcjr22         -0.000362  0.001348
end_geohash_dqcjr29         -0.000535 -0.000475
end_geohash_dqcjr2e         -0.000452  0.000290
end_geohash_dqcjr2w         -0.000215  0.000369
end_geohash_dqcjr30         -0.000881  0.000537
end_geohash_dqcjr33         -0.000607 -0.000679
end_geohash_dqcjr34         -0.000745  0.000067
end_geohash_dqcjr3h         -0.000641 -0.000188
end_geohash_dqcjr3r         -0.000555 -0.000672
end_geohash_dqcjr3s         -0.000795 -0.000288
end_geohash_dqcjr40         -0.000341 -0.000267
end_geohash_dqcjr45         -0.000469  0.000511
end_geohash_dqcjr49         -0.000754 -0.000827
end_geohash_dqcjr4d         -0.001483  0.001687
end_geohash_dqcjr4j         -0.000286 -0.000326
end_geohash_dqcjr4w         -0.000974 -0.001421
end_geohash_dqcjr53         -0.002736  0.000952
end_geohash_dqcjr54         -0.000992 -0.000829
end_geohash_dqcjr5e         -0.001017 -0.001281
end_geohash_dqcjr5g         -0.001414 -0.001044
end_geohash_dqcjr5h         -0.000785 -0.000130
end_geohash_dqcjr5n         -0.000593 -0.000627
end_geohash_dqcjr64         -0.001193 -0.000467
end_geohash_dqcjr6b         -0.000423 -0.000144
end_geohash_dqcjr6d         -0.001093  0.000200
end_geohash_dqcjr6h         -0.001816 -0.000226
end_geohash_dqcjr6z         -0.000532 -0.000152
end_geohash_dqcjr71         -0.001706  0.001352
end_geohash_dqcjr7c         -0.000254  0.000539
end_geohash_dqcjr7v         -0.000596  0.000479
end_geohash_dqcjr80         -0.000412  0.001800
end_geohash_dqcjr8x         -0.001263 -0.000605
end_geohash_dqcjr91         -0.001153 -0.000603
end_geohash_dqcjr95         -0.000461 -0.000188
end_geohash_dqcjr97         -0.000251  0.000160
end_geohash_dqcjr99         -0.000891  0.001315
end_geohash_dqcjr9n         -0.003079 -0.001789
end_geohash_dqcjr9r         -0.000738 -0.000099
end_geohash_dqcjr9u         -0.000180  0.000151
end_geohash_dqcjr9y         -0.001092 -0.000078
end_geohash_dqcjrb5         -0.000447 -0.000122
end_geohash_dqcjrbj         -0.000117  0.000080
end_geohash_dqcjrbt         -0.000585  0.000472
end_geohash_dqcjrbz         -0.000596  0.000134
end_geohash_dqcjrc8         -0.000802 -0.001228
end_geohash_dqcjrce         -0.001272 -0.001012
end_geohash_dqcjrcn         -0.000648 -0.000098
end_geohash_dqcjrct         -0.000609 -0.000654
end_geohash_dqcjrcv         -0.000119 -0.000052
end_geohash_dqcjrcx         -0.000279 -0.000172
end_geohash_dqcjrd0         -0.000455  0.000208
end_geohash_dqcjrd1         -0.000489 -0.000811
end_geohash_dqcjrdw         -0.001746 -0.000059
end_geohash_dqcjre1         -0.000558 -0.000006
end_geohash_dqcjreb         -0.000617 -0.000323
end_geohash_dqcjreg         -0.000171 -0.000372
end_geohash_dqcjrew         -0.000396  0.000175
end_geohash_dqcjrf5         -0.000536 -0.000150
end_geohash_dqcjrf8         -0.001076 -0.001172
end_geohash_dqcjrfb         -0.000473  0.000270
end_geohash_dqcjrfc         -0.000773 -0.001387
end_geohash_dqcjrff         -0.000581 -0.000030
end_geohash_dqcjrfj         -0.000200  0.000089
end_geohash_dqcjrfq         -0.000021  0.000068
end_geohash_dqcjrfu         -0.000212 -0.000374
end_geohash_dqcjrg8         -0.000336  0.000192
end_geohash_dqcjrhc         -0.001100 -0.000678
end_geohash_dqcjrhg         -0.001800 -0.000490
end_geohash_dqcjrhn         -0.000824 -0.000652
end_geohash_dqcjrhs         -0.000467 -0.000201
end_geohash_dqcjrht         -0.000215  0.000265
end_geohash_dqcjrhx         -0.000370 -0.000048
end_geohash_dqcjrhy         -0.000476 -0.000372
end_geohash_dqcjrj7         -0.000810  0.000121
end_geohash_dqcjrjb         -0.000663 -0.000225
end_geohash_dqcjrjp         -0.000828  0.000577
end_geohash_dqcjrjw         -0.000261 -0.000235
end_geohash_dqcjrk4         -0.000433 -0.000840
end_geohash_dqcjrk6         -0.000249 -0.000431
end_geohash_dqcjrkc         -0.000585 -0.001538
end_geohash_dqcjrkw         -0.000472 -0.000751
end_geohash_dqcjrmm         -0.000051  0.000131
end_geohash_dqcjrn4         -0.000422 -0.000363
end_geohash_dqcjrn6         -0.000119 -0.000188
end_geohash_dqcjrnd         -0.000849  0.000420
end_geohash_dqcjrnn         -0.000279 -0.000701
end_geohash_dqcjrny         -0.000213 -0.000026
end_geohash_dqcjrp5         -0.000324  0.000764
end_geohash_dqcjrp6         -0.000234  0.000422
end_geohash_dqcjrpf         -0.000234 -0.000219
end_geohash_dqcjrpj         -0.000120  0.000817
end_geohash_dqcjrq4         -0.000076 -0.000353
end_geohash_dqcjrq9         -0.000180  0.001253
end_geohash_dqcjrr3         -0.000053  0.000461
end_geohash_dqcjrs3         -0.000436  0.000044
end_geohash_dqcjrsh         -0.000270  0.000138
end_geohash_dqcjrsp         -0.000153  0.000525
end_geohash_dqcjrsy         -0.000095 -0.000367
end_geohash_dqcjruc         -0.000038 -0.000434
end_geohash_dqcjrvb         -0.000040 -0.000220
end_geohash_dqcjrvm         -0.000019 -0.000061
end_geohash_dqcjrvt          0.000002 -0.000356
end_geohash_dqcjrvx          0.000004  0.000138
end_geohash_dqcjrwc          0.000047 -0.000281
end_geohash_dqcjrys          0.000100  0.000846
end_geohash_dqcjrzm          0.000146  0.000228
end_geohash_dqcjrzr          0.000135 -0.000518
end_geohash_dqcjrzv          0.000038  0.000396
end_geohash_dqcjt2h          0.000039 -0.000162
end_geohash_dqcjtb4          0.000036 -0.000440
end_geohash_dqcjtbd          0.000031  0.000617
end_geohash_dqcjtfx          0.000013  0.000094
end_geohash_dqcjtgq          0.000014 -0.000129
end_geohash_dqcjtmr          0.000752 -0.000299
end_geohash_dqcjtu9          0.000291  0.000171
end_geohash_dqcjtug          0.000179  0.000202
end_geohash_dqcjtuh          0.000223  0.000340
end_geohash_dqcjv7d          0.000328  0.000134
end_geohash_dqcjv87          0.000944  0.000328
end_geohash_dqcjv8n          0.000272 -0.000071
end_geohash_dqcjv8r          0.000125  0.000143
end_geohash_dqcjv8v          0.000656 -0.000366
end_geohash_dqcjv8z          0.000149 -0.000148
end_geohash_dqcjv9f          0.000514  0.000402
end_geohash_dqcjvd1          0.000165 -0.000019
end_geohash_dqcjvd3          0.000271 -0.000062
end_geohash_dqcjvee          0.000149  0.000087
end_geohash_dqcjw0e          0.000037 -0.000302
end_geohash_dqcjw14          0.000043  0.000257
end_geohash_dqcjw16          0.000036  0.000233
end_geohash_dqcjw1b          0.000029  0.000049
end_geohash_dqcjw2p         -0.000077  0.000542
end_geohash_dqcjw3e          0.000037 -0.000017
end_geohash_dqcjw3j          0.000059  0.000227
end_geohash_dqcjw40          0.000145  0.000120
end_geohash_dqcjw5x          0.000174  0.000543
end_geohash_dqcjw64          0.000043  0.000475
end_geohash_dqcjwcn          0.000072 -0.000295
end_geohash_dqcjwj7          0.000110 -0.000373
end_geohash_dqcjwm2          0.000048 -0.000088
end_geohash_dqcjwr3          0.000032 -0.000073
end_geohash_dqcjx06          0.000004  0.000356
end_geohash_dqcjx0n         -0.000049  0.000839
end_geohash_dqcjx16          0.000043  0.000301
end_geohash_dqcjx20          0.000043 -0.000665
end_geohash_dqcjx2k          0.000003 -0.000128
end_geohash_dqcjx30          0.000142 -0.000476
end_geohash_dqcjx3h          0.000062  0.000363
end_geohash_dqcjx44          0.000067  0.000743
end_geohash_dqcjx4w          0.000038  0.000308
end_geohash_dqcjx5f          0.000202  0.000156
end_geohash_dqcjx5r          0.000037 -0.000043
end_geohash_dqcjx5v          0.000085  0.000358
end_geohash_dqcjx66          0.000106  0.000375
end_geohash_dqcjx7g          0.000285  0.000324
end_geohash_dqcjxbr          0.000085 -0.000270
end_geohash_dqcjxdb          0.000152 -0.000597
end_geohash_dqcjxg0          0.000218 -0.000454
end_geohash_dqcjxgb          0.000016 -0.000383
end_geohash_dqcjxhw          0.000108  0.000224
end_geohash_dqcjxjy          0.000293 -0.000586
end_geohash_dqcjxn1          0.000229  0.000177
end_geohash_dqcjxpq          0.000471 -0.000248
end_geohash_dqcjxqn          0.000315 -0.000066
end_geohash_dqcjxrn          0.001218 -0.000544
end_geohash_dqcjxs0          0.000128 -0.000138
end_geohash_dqcjxtv          0.000069 -0.000017
end_geohash_dqcjxut          0.000042 -0.000309
end_geohash_dqcjxx4          0.000226  0.000230
end_geohash_dqcjxxt          0.000429 -0.000013
end_geohash_dqcjxzj          0.000161  0.000154
end_geohash_dqcjy4d          0.000225 -0.000123
end_geohash_dqcjz1v          0.000214 -0.000268
end_geohash_dqcjz31          0.000192 -0.000142
end_geohash_dqcjz3b          0.000081  0.000094
end_geohash_dqcjz4v          0.000413  0.000025
end_geohash_dqcjz51          0.000196  0.000130
end_geohash_dqcjz5p          0.000081 -0.000039
end_geohash_dqcjz6n          0.000154  0.000258
end_geohash_dqcjz8v          0.000330  0.000078
end_geohash_dqcjzc6          0.000199  0.000405
end_geohash_dqckbjy          0.000104  0.000162
end_geohash_dqckbp7          0.000145  0.000363
end_geohash_dqcm01m          0.000057  0.000108
end_geohash_dqcm04c         -0.000020  0.000039
end_geohash_dqcm04q          0.000041  0.000268
end_geohash_dqcm073          0.000040  0.000102
end_geohash_dqcm0d8         -0.000003 -0.000037
end_geohash_dqcm0ez          0.000085 -0.000134
end_geohash_dqcm0hp          0.000020 -0.000128
end_geohash_dqcm0hw          0.000075  0.000203
end_geohash_dqcm0ju         -0.000046 -0.000226
end_geohash_dqcm0kx          0.000104  0.000020
end_geohash_dqcm0mx          0.000229  0.000343
end_geohash_dqcm0n3         -0.000089  0.000312
end_geohash_dqcm0n7          0.000061  0.000002
end_geohash_dqcm0nb         -0.000117 -0.000083
end_geohash_dqcm0ns         -0.000149  0.000561
end_geohash_dqcm0pf         -0.000167 -0.000127
end_geohash_dqcm0pn         -0.000040  0.000274
end_geohash_dqcm0pw         -0.000072  0.000234
end_geohash_dqcm0q2          0.000014  0.000192
end_geohash_dqcm0qf          0.000010  0.000430
end_geohash_dqcm0re          0.000007 -0.000074
end_geohash_dqcm0xy          0.000005 -0.000057
end_geohash_dqcm1r8          0.000031 -0.000123
end_geohash_dqcm1z3          0.000154 -0.000160
end_geohash_dqcm20e         -0.000217  0.000005
end_geohash_dqcm20q         -0.000085  0.000064
end_geohash_dqcm21d         -0.000402 -0.000666
end_geohash_dqcm21n         -0.000085 -0.000147
end_geohash_dqcm21w         -0.000034  0.000267
end_geohash_dqcm22s         -0.000082 -0.000027
end_geohash_dqcm231          0.000032 -0.000583
end_geohash_dqcm23c         -0.000066  0.000107
end_geohash_dqcm245         -0.000160 -0.000246
end_geohash_dqcm24g         -0.000149 -0.000218
end_geohash_dqcm24w         -0.000026 -0.000084
end_geohash_dqcm25s         -0.000051  0.000492
end_geohash_dqcm27y          0.000102  0.000033
end_geohash_dqcm28c          0.000024  0.000310
end_geohash_dqcm28z          0.000117 -0.000068
end_geohash_dqcm290          0.000057 -0.000051
end_geohash_dqcm2hq          0.000123 -0.001087
end_geohash_dqcm2ju          0.000002  0.000142
end_geohash_dqcm2n8         -0.000031 -0.000205
end_geohash_dqcm2p8          0.000008 -0.000272
end_geohash_dqcm2v6          0.000026  0.000245
end_geohash_dqcm2w5          0.000018 -0.000168
end_geohash_dqcm302          0.000095  0.000294
end_geohash_dqcm337          0.000126  0.000121
end_geohash_dqcm5z9          0.000428  0.000024
end_geohash_dqcm737          0.000441  0.000312
end_geohash_dqcm813          0.000029 -0.000104
end_geohash_dqcm839          0.000049 -0.000020
end_geohash_dqcm88p          0.000109  0.000112
end_geohash_dqcm8be          0.000175  0.000328
end_geohash_dqcm8e9          0.000381 -0.000437
end_geohash_dqcm8er          0.000096  0.000088
end_geohash_dqcm8v7          0.000457  0.000003
end_geohash_dqcm95t          0.000114  0.000024
end_geohash_dqcm9h3          0.000272 -0.000208
end_geohash_dqcm9kc          0.000503  0.000234
end_geohash_dqcm9np          0.000163  0.000069
end_geohash_dqcm9ry          0.000204 -0.000047
end_geohash_dqcmb1d          0.000113  0.000025
end_geohash_dqcmfeg          0.000404  0.000168
end_geohash_dqcn7y3          0.001089 -0.000145
end_geohash_dqcnj5x          0.000150  0.000155
end_geohash_dqcnjhy          0.000353 -0.000212
end_geohash_dqcnjk2          0.001065 -0.000245
end_geohash_dqcnk7p          0.000478  0.000072
end_geohash_dqcnk9e          0.000494 -0.000018
end_geohash_dqcnke2          0.000476 -0.000205
end_geohash_dqcnksb          0.000480 -0.000036
end_geohash_dqcnn9r          0.000499 -0.000446
end_geohash_dqcnndt          0.000433 -0.000160
end_geohash_dqcnnee          0.000574 -0.000046
end_geohash_dqcnnez          0.000258  0.000080
end_geohash_dqcnnsm          0.000307 -0.000066
end_geohash_dqcns1f          0.000688  0.000061
end_geohash_dqcq349          0.000460 -0.000229
start_capacity_bin_Medium   -0.048515  0.006878
start_capacity_bin_Small     0.077625 -0.003495
In [ ]:
dbscan = DBSCAN(eps=1.5, min_samples=3)
cluster_labels = dbscan.fit_predict(X_processed)
algo_name = "DBSCAN"


grouped_sample['cluster_label'] = cluster_labels
grouped_sample['cluster_label'] = grouped_sample['cluster_label'].astype(str) # لتحويل -1 في DBSCAN إلى '-1' نصية
unique_labels = set(cluster_labels)
clusters_for_silhouette = [label for label in unique_labels if label != -1]

if len(clusters_for_silhouette) > 1:
    valid_indices = (cluster_labels != -1)
    sil_score = silhouette_score(X_processed[valid_indices], cluster_labels[valid_indices])
    print(f"{algo_name} - Silhouette Score (excluding outliers) = {sil_score:.4f}")
else:
    print(f"{algo_name} - عدد العناقيد غير كافٍ أو معظمها outliers، لا يمكن حساب Silhouette Score.")



print(f"\n--- Cluster Description for {algo_name} ---")

print("\nAverage Values of Numerical Features per Cluster:")
numerical_summary = grouped_sample.groupby('cluster_label')[numeric_features].mean()
print(numerical_summary)

print("\nDistribution of Categorical Features per Cluster:")
for feature in categorical_features:
    print(f"\nFeature: {feature}")
    cluster_counts = grouped_sample.groupby(['cluster_label', feature]).size().unstack(fill_value=0)
    cluster_percentages = cluster_counts.apply(lambda x: x / x.sum(), axis=1) * 100
    print(cluster_percentages.round(2)) 

print("\nCluster Sizes:")
print(grouped_sample['cluster_label'].value_counts().sort_index())

pca = PCA(n_components=2)
components = pca.fit_transform(X_processed)

pca_df = pd.DataFrame(data = components, columns = ['PC1', 'PC2'])
pca_df['cluster_label'] = grouped_sample['cluster_label']

fig = px.scatter(
    pca_df,
    x='PC1',
    y='PC2',
    color='cluster_label',
    title=f'{algo_name} Clustering (PCA Reduced Dimensions)',
    labels={'PC1': 'Principal Component 1', 'PC2': 'Principal Component 2'}
)

# fig.add_trace(go.Scatter(
#     x=centroids_pca[:, 0], y=centroids_pca[:, 1],
#     mode='markers',
#     marker=dict(size=15, symbol='x', color='black'),
#     name='Centroids'
# ))

#fig.show()
image_path = r"C:\Users\ASUS\OneDrive\Desktop\images\Screenshot 2025-06-20 185455.png"
image = Image.open(image_path)
display(image)

processed_feature_names = numeric_features[:]
preprocessor.fit(grouped_sample) 
ohe_feature_names = preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_features)
processed_feature_names.extend(ohe_feature_names)

loadings = pd.DataFrame(pca.components_.T, columns=['PC1', 'PC2'], index=processed_feature_names)
print("\nPCA Loadings (Contribution of Original Features to Principal Components):")
print(loadings.to_string())
DBSCAN - Silhouette Score (excluding outliers) = -0.1635

--- Cluster Description for DBSCAN ---

Average Values of Numerical Features per Cluster:
                    temp   humidity  windspeed  ride_duration_min  \
cluster_label                                                       
-1             16.305688  63.226946  23.412787          21.301977   
0              11.779218  66.864425  17.979218          10.813570   
1               5.182609  67.207246  12.137681           9.892754   
10              5.040000  67.925000  11.975000           9.400000   
100            10.166667  40.266667  28.966667           4.200000   
...                  ...        ...        ...                ...   
95             25.233333  53.600000  22.966667          10.900000   
96             22.133333  52.600000  27.233333           9.400000   
97             25.700000  58.233333  22.366667           5.166667   
98             14.880000  49.560000  26.240000           9.180000   
99             24.500000  55.633333  20.933333           9.600000   

               cost_category  distance_to_business_area  \
cluster_label                                             
-1                  2.302541                   0.030351   
0                   3.000000                   0.008439   
1                   3.000000                   0.011699   
10                  3.000000                   0.006574   
100                 2.000000                   0.040535   
...                      ...                        ...   
95                  2.000000                   0.080951   
96                  2.000000                   0.010884   
97                  2.000000                   0.014410   
98                  2.000000                   0.009703   
99                  2.000000                   0.018383   

               start_distance_to_shuttle_m  start_distance_to_metro_m  
cluster_label                                                          
-1                             2304.793950                 170.948676  
0                               554.463299                  84.665590  
1                               510.036236                  68.580518  
10                              418.148358                 288.865420  
100                             249.156334                  31.869308  
...                                    ...                        ...  
95                             5373.717928                  17.009052  
96                              917.693840                  63.428558  
97                             1650.659496                 147.881532  
98                             1511.695479                 153.192424  
99                             1095.261916                  25.830314  

[167 rows x 8 columns]

Distribution of Categorical Features per Cluster:

Feature: rideable_type
rideable_type  classic_bike  electric_bike
cluster_label                             
-1                    48.73          51.27
0                     60.51          39.49
1                     49.28          50.72
10                    50.00          50.00
100                  100.00           0.00
...                     ...            ...
95                    66.67          33.33
96                    66.67          33.33
97                    66.67          33.33
98                    40.00          60.00
99                    66.67          33.33

[167 rows x 2 columns]

Feature: member_casual
member_casual  casual  member
cluster_label                
-1              59.42   40.58
0              100.00    0.00
1              100.00    0.00
10             100.00    0.00
100              0.00  100.00
...               ...     ...
95               0.00  100.00
96               0.00  100.00
97               0.00  100.00
98              20.00   80.00
99               0.00  100.00

[167 rows x 2 columns]

Feature: weather_bin
weather_bin    Cloudy  Rainy   Sunny
cluster_label                       
-1              13.92  42.28   43.81
0               57.82  21.52   20.66
1               98.55   0.00    1.45
10              95.00   0.00    5.00
100              0.00   0.00  100.00
...               ...    ...     ...
95               0.00   0.00  100.00
96               0.00   0.00  100.00
97               0.00   0.00  100.00
98               0.00   0.00  100.00
99               0.00   0.00  100.00

[167 rows x 3 columns]

Feature: start_geohash
start_geohash  dqbvrzn  dqbvx3p  dqbvx7w  dqbvx96  dqbvxc8  dqbvxe9  dqbvxek  \
cluster_label                                                                  
-1                0.04     0.08     0.04     0.04     0.08     0.04     0.16   
0                 0.00     0.00     0.00     0.00     0.00     0.00     0.00   
1                 0.00     0.00     0.00     0.00     0.00     0.00     0.00   
10                0.00     0.00     0.00     0.00     0.00     0.00     0.00   
100               0.00     0.00     0.00     0.00     0.00     0.00     0.00   
...                ...      ...      ...      ...      ...      ...      ...   
95                0.00     0.00     0.00     0.00     0.00     0.00     0.00   
96                0.00     0.00     0.00     0.00     0.00     0.00     0.00   
97                0.00     0.00     0.00     0.00     0.00     0.00     0.00   
98                0.00     0.00     0.00     0.00     0.00     0.00     0.00   
99                0.00     0.00     0.00     0.00     0.00     0.00     0.00   

start_geohash  dqbvxeu  dqbvxfe  dqbvxgd  ...  dqcnj5x  dqcnk9e  dqcnke2  \
cluster_label                             ...                              
-1                0.04     0.08     0.04  ...     0.04     0.04     0.04   
0                 0.00     0.00     0.00  ...     0.00     0.00     0.00   
1                 0.00     0.00     0.00  ...     0.00     0.00     0.00   
10                0.00     0.00     0.00  ...     0.00     0.00     0.00   
100               0.00     0.00     0.00  ...     0.00     0.00     0.00   
...                ...      ...      ...  ...      ...      ...      ...   
95                0.00     0.00     0.00  ...     0.00     0.00     0.00   
96                0.00     0.00     0.00  ...     0.00     0.00     0.00   
97                0.00     0.00     0.00  ...     0.00     0.00     0.00   
98                0.00     0.00     0.00  ...     0.00     0.00     0.00   
99                0.00     0.00     0.00  ...     0.00     0.00     0.00   

start_geohash  dqcnkjn  dqcnndr  dqcnndt  dqcnnsm  dqcns19  dqcns48  dqcq35j  
cluster_label                                                                 
-1                0.12     0.04     0.08     0.12     0.04     0.04     0.04  
0                 0.00     0.00     0.00     0.00     0.00     0.00     0.00  
1                 0.00     0.00     0.00     0.00     0.00     0.00     0.00  
10                0.00     0.00     0.00     0.00     0.00     0.00     0.00  
100               0.00     0.00     0.00     0.00     0.00     0.00     0.00  
...                ...      ...      ...      ...      ...      ...      ...  
95                0.00     0.00     0.00     0.00     0.00     0.00     0.00  
96                0.00     0.00     0.00     0.00     0.00     0.00     0.00  
97                0.00     0.00     0.00     0.00     0.00     0.00     0.00  
98                0.00     0.00     0.00     0.00     0.00     0.00     0.00  
99                0.00     0.00     0.00     0.00     0.00     0.00     0.00  

[167 rows x 606 columns]

Feature: start_capacity_bin
start_capacity_bin  10–15 $  15–30 $  30–60 $  7–10 $
cluster_label                                        
-1                    34.85    53.65    11.25    0.24
0                      0.00    89.73    10.27    0.00
1                    100.00     0.00     0.00    0.00
10                   100.00     0.00     0.00    0.00
100                  100.00     0.00     0.00    0.00
...                     ...      ...      ...     ...
95                     0.00   100.00     0.00    0.00
96                     0.00     0.00   100.00    0.00
97                     0.00   100.00     0.00    0.00
98                     0.00     0.00   100.00    0.00
99                     0.00   100.00     0.00    0.00

[167 rows x 4 columns]

Cluster Sizes:
cluster_label
-1     2479
0       818
1        69
10       20
100       3
       ... 
95        3
96        6
97        3
98        5
99        3
Name: count, Length: 167, dtype: int64
No description has been provided for this image
PCA Loadings (Contribution of Original Features to Principal Components):
                                      PC1       PC2
temp                         1.473775e-02  0.383722
humidity                    -2.515442e-02 -0.529548
windspeed                    5.267178e-03  0.625646
ride_duration_min            7.275576e-02  0.250793
cost_category                6.636345e-02 -0.264933
distance_to_business_area    6.096214e-01 -0.026776
start_distance_to_shuttle_m  6.227339e-01 -0.025858
start_distance_to_metro_m    4.736822e-01  0.020366
rideable_type_electric_bike -1.449674e-02  0.002489
member_casual_member        -4.160469e-02  0.060349
weather_bin_Rainy           -7.180000e-03 -0.009047
weather_bin_Sunny            1.317847e-02  0.204815
start_geohash_dqbvx3p        2.398170e-03 -0.000365
start_geohash_dqbvx7w        1.081112e-03  0.000072
start_geohash_dqbvx96        1.238706e-03  0.000015
start_geohash_dqbvxc8        2.134598e-03 -0.000409
start_geohash_dqbvxe9        1.194380e-03  0.000014
start_geohash_dqbvxek        4.865869e-03 -0.000163
start_geohash_dqbvxeu        1.221426e-03  0.000229
start_geohash_dqbvxfe        2.015529e-03  0.000190
start_geohash_dqbvxgd        1.049133e-03 -0.000209
start_geohash_dqbvxs5        1.231048e-03 -0.000009
start_geohash_dqbvxsm        2.461707e-03 -0.000091
start_geohash_dqbvxsw        2.396488e-03 -0.000162
start_geohash_dqbvxts        2.633608e-03 -0.000037
start_geohash_dqbvxv0        1.193487e-03 -0.000195
start_geohash_dqbvxzb        2.820167e-03  0.000116
start_geohash_dqchbuk        7.681811e-04 -0.000093
start_geohash_dqchbv1        7.979695e-04 -0.000110
start_geohash_dqchby0        6.562643e-04 -0.000093
start_geohash_dqchbyf        7.080968e-04 -0.000231
start_geohash_dqchtzv        6.710258e-04  0.000129
start_geohash_dqchuch        3.118400e-04  0.000041
start_geohash_dqchv1g        2.512065e-04  0.000097
start_geohash_dqchv37        2.294478e-04  0.000239
start_geohash_dqchv64        2.386256e-04 -0.000244
start_geohash_dqchv8b        2.514165e-04  0.000367
start_geohash_dqchvu3        1.950884e-04 -0.000263
start_geohash_dqchvue        1.758171e-04 -0.000011
start_geohash_dqchvwn        1.566542e-04  0.000147
start_geohash_dqchvy9        1.371230e-04  0.000097
start_geohash_dqchvz1        6.943647e-04  0.000127
start_geohash_dqchvz7        1.345267e-04  0.000323
start_geohash_dqchwpv        4.885834e-04  0.000152
start_geohash_dqchwr8        3.941061e-04 -0.000014
start_geohash_dqchwrg        4.367925e-04 -0.000235
start_geohash_dqchwwg        2.745263e-04 -0.000018
start_geohash_dqchwwn        2.182080e-03 -0.000927
start_geohash_dqchwwy        4.133777e-04 -0.000037
start_geohash_dqchwxr        2.020046e-04 -0.000157
start_geohash_dqchwz3        2.641399e-04 -0.000058
start_geohash_dqchwzs        3.406984e-04  0.000042
start_geohash_dqchwzv        3.019624e-04 -0.000033
start_geohash_dqchxkn        7.947746e-04 -0.000120
start_geohash_dqchxky        1.538005e-03  0.000169
start_geohash_dqchxyb        4.542322e-04 -0.000093
start_geohash_dqchy2q        5.975283e-04 -0.000352
start_geohash_dqchy2w        6.242084e-04  0.000063
start_geohash_dqchy3t        5.814952e-04 -0.000120
start_geohash_dqchy6u        5.074584e-04 -0.000334
start_geohash_dqchy83        4.889999e-04 -0.000102
start_geohash_dqchy87        3.078158e-04 -0.000009
start_geohash_dqchy8m        7.288436e-04 -0.000030
start_geohash_dqchy9g        1.755974e-03 -0.000403
start_geohash_dqchy9t        1.599728e-04 -0.000135
start_geohash_dqchy9y        2.119949e-04  0.000045
start_geohash_dqchyb1        1.118963e-03  0.000665
start_geohash_dqchyb4        7.544982e-04 -0.000040
start_geohash_dqchyc3        5.287096e-04  0.000372
start_geohash_dqchycg        4.168210e-04 -0.000210
start_geohash_dqchydb        4.993339e-04 -0.000098
start_geohash_dqchydx        1.493128e-04 -0.000193
start_geohash_dqchye5        1.042088e-03 -0.000101
start_geohash_dqchyeq        3.027016e-04 -0.000005
start_geohash_dqchyer        1.880741e-04  0.000182
start_geohash_dqchykp        1.467629e-04 -0.000072
start_geohash_dqchymq        2.962826e-04 -0.000236
start_geohash_dqchypz        3.516381e-04  0.000366
start_geohash_dqchyqu        8.731297e-05  0.000072
start_geohash_dqchyr5        8.211161e-05  0.000062
start_geohash_dqchysx        1.640613e-04 -0.000180
start_geohash_dqchytn        5.914669e-04  0.000115
start_geohash_dqchytr        2.270930e-04 -0.000249
start_geohash_dqchyw0        4.976008e-04 -0.000487
start_geohash_dqchyxj        3.478416e-04  0.000133
start_geohash_dqchyxm        3.473857e-04  0.000189
start_geohash_dqchyxs        2.923656e-04 -0.000034
start_geohash_dqchzth        6.739836e-05  0.000151
start_geohash_dqcj0bk        7.473412e-04 -0.000294
start_geohash_dqcj1eu        5.718829e-04  0.000047
start_geohash_dqcj1se        1.169850e-03  0.000037
start_geohash_dqcj1tm        1.115998e-03 -0.000538
start_geohash_dqcj4ey        9.670979e-04 -0.000401
start_geohash_dqcj4s4        4.997773e-04 -0.000121
start_geohash_dqcj4sd        5.189617e-04  0.000010
start_geohash_dqcj4tb        5.234895e-04 -0.000004
start_geohash_dqcj4td        5.025708e-04 -0.000224
start_geohash_dqcj4v8        4.922787e-04 -0.000238
start_geohash_dqcj4wk        1.852601e-03  0.000106
start_geohash_dqcj5j3        4.267466e-04 -0.000131
start_geohash_dqcj5mk        3.585692e-04  0.000050
start_geohash_dqcj5vb        1.012884e-03  0.000476
start_geohash_dqcj5vv        3.185863e-04  0.000185
start_geohash_dqcj5xs        3.073348e-04 -0.000218
start_geohash_dqcj5yg        2.635110e-04 -0.000164
start_geohash_dqcj5ys        1.118831e-03 -0.000275
start_geohash_dqcj5z1        2.756931e-04 -0.000013
start_geohash_dqcj6em        9.891087e-04 -0.000097
start_geohash_dqcj6uz        4.694017e-04  0.000077
start_geohash_dqcj6v1        4.053152e-04 -0.000085
start_geohash_dqcj6v2        8.394460e-04 -0.000364
start_geohash_dqcj6y9        4.856870e-04  0.000037
start_geohash_dqcj6yk        9.489153e-04 -0.000065
start_geohash_dqcj6z6        5.362408e-04 -0.000021
start_geohash_dqcj71s        3.896685e-04 -0.000066
start_geohash_dqcj7jf        3.811973e-04 -0.000203
start_geohash_dqcj845        8.212473e-04 -0.000097
start_geohash_dqcj861        1.878261e-03  0.001363
start_geohash_dqcjhkd        2.389620e-04  0.000119
start_geohash_dqcjhph        5.057113e-04 -0.000297
start_geohash_dqcjhqb        1.217906e-03 -0.000005
start_geohash_dqcjhvj        4.309662e-04  0.000102
start_geohash_dqcjhwf        2.081433e-04  0.000111
start_geohash_dqcjhwp        6.092856e-04 -0.000380
start_geohash_dqcjhzm        2.646957e-04 -0.000071
start_geohash_dqcjj1h        1.660411e-04  0.000125
start_geohash_dqcjj39        1.000524e-03 -0.000039
start_geohash_dqcjj3d        1.365617e-04 -0.000030
start_geohash_dqcjj6p        1.227731e-04 -0.000136
start_geohash_dqcjj7y        1.419961e-04 -0.000031
start_geohash_dqcjj82        5.021413e-05 -0.000044
start_geohash_dqcjj8j        2.680392e-04 -0.000283
start_geohash_dqcjjbh        7.786307e-05 -0.000058
start_geohash_dqcjjdq        1.812868e-04 -0.000084
start_geohash_dqcjjeu        2.680639e-04 -0.000325
start_geohash_dqcjjf9        6.385998e-05 -0.000053
start_geohash_dqcjjfz        6.146228e-05  0.000234
start_geohash_dqcjjgn        1.551323e-04  0.000061
start_geohash_dqcjjjd        2.455368e-04  0.000126
start_geohash_dqcjjjz        1.243995e-04 -0.000050
start_geohash_dqcjjk7        2.213994e-04  0.000237
start_geohash_dqcjjkv        1.535454e-04 -0.000188
start_geohash_dqcjjm2        3.158521e-04  0.000024
start_geohash_dqcjjm5        2.266157e-04 -0.000274
start_geohash_dqcjjmg        8.044290e-04 -0.000134
start_geohash_dqcjjnt        1.158904e-04  0.000165
start_geohash_dqcjjq0        1.449634e-03  0.001529
start_geohash_dqcjjq2        4.102157e-04 -0.000260
start_geohash_dqcjjq6        1.042365e-04 -0.000105
start_geohash_dqcjjqe        5.436979e-04  0.000542
start_geohash_dqcjjqr        6.640794e-04 -0.000411
start_geohash_dqcjjr3        2.101405e-04  0.000180
start_geohash_dqcjjuc        8.770218e-05  0.000053
start_geohash_dqcjjuw        4.531730e-04 -0.000377
start_geohash_dqcjjw9        3.526708e-04 -0.000171
start_geohash_dqcjjwg        4.706690e-04  0.000184
start_geohash_dqcjjwy        6.655027e-04  0.001565
start_geohash_dqcjjxh        1.068689e-03 -0.000237
start_geohash_dqcjjxq        2.274356e-04  0.000017
start_geohash_dqcjjxx        4.607847e-04 -0.000514
start_geohash_dqcjjy0        3.705963e-04 -0.000381
start_geohash_dqcjjy5        2.545475e-04 -0.000554
start_geohash_dqcjjyf        1.276726e-04  0.000103
start_geohash_dqcjjzc        5.894730e-05 -0.000251
start_geohash_dqcjjzh        2.271638e-04 -0.000357
start_geohash_dqcjjzu        2.432277e-04 -0.000126
start_geohash_dqcjjzz        1.592483e-04 -0.000176
start_geohash_dqcjkfm        1.655164e-04 -0.000131
start_geohash_dqcjm1h        3.319840e-04 -0.000226
start_geohash_dqcjm2y        2.541988e-05  0.000101
start_geohash_dqcjm8e        2.929376e-05 -0.000003
start_geohash_dqcjm8x        3.294295e-04 -0.000842
start_geohash_dqcjmb7        9.050119e-05 -0.000049
start_geohash_dqcjmbc        8.724185e-05 -0.000117
start_geohash_dqcjmbg        2.424257e-04 -0.000359
start_geohash_dqcjmbh        1.984614e-04  0.000058
start_geohash_dqcjmbn        1.274706e-05  0.000208
start_geohash_dqcjmcp       -6.393776e-05  0.000202
start_geohash_dqcjmgk       -5.031578e-05  0.000182
start_geohash_dqcjmqq       -2.518678e-05  0.000362
start_geohash_dqcjmss        1.009443e-04 -0.000133
start_geohash_dqcjmu8        8.136443e-05  0.000325
start_geohash_dqcjmze       -9.451252e-07  0.000113
start_geohash_dqcjn05        4.080070e-04 -0.000013
start_geohash_dqcjn2y        4.073061e-05  0.000096
start_geohash_dqcjn3z        2.852370e-04  0.000198
start_geohash_dqcjn56        3.581129e-04  0.000022
start_geohash_dqcjn5t        5.374516e-05 -0.000211
start_geohash_dqcjn6h        4.117995e-04 -0.000282
start_geohash_dqcjn6u        1.667665e-04  0.000121
start_geohash_dqcjn6w        2.595637e-04 -0.000398
start_geohash_dqcjn8j        3.386800e-04 -0.000042
start_geohash_dqcjn8s        1.626415e-04 -0.000013
start_geohash_dqcjn8y        2.549463e-04 -0.000097
start_geohash_dqcjn97        2.536706e-04  0.000102
start_geohash_dqcjn9u        4.417608e-04 -0.000119
start_geohash_dqcjn9w        3.229517e-04 -0.000184
start_geohash_dqcjnbb        9.664149e-05  0.000123
start_geohash_dqcjnd5        4.291358e-04  0.000288
start_geohash_dqcjndf        4.123165e-04  0.000600
start_geohash_dqcjndq        4.998741e-04  0.000197
start_geohash_dqcjne0        8.746006e-05  0.000059
start_geohash_dqcjnem        1.085604e-04 -0.000079
start_geohash_dqcjngj        3.072502e-03  0.000733
start_geohash_dqcjnp7        1.071649e-05 -0.000012
start_geohash_dqcjnpg        1.112874e-05 -0.000322
start_geohash_dqcjnpw        7.260530e-05  0.000160
start_geohash_dqcjnqd        7.180358e-05 -0.000444
start_geohash_dqcjnu4        8.967613e-04  0.000064
start_geohash_dqcjnvz        3.045952e-03  0.001145
start_geohash_dqcjnxq        5.836633e-04  0.002781
start_geohash_dqcjnxy       -9.145158e-04  0.001195
start_geohash_dqcjny8        1.124962e-03  0.000090
start_geohash_dqcjnzq        7.557045e-04  0.001245
start_geohash_dqcjp9n       -1.415618e-05  0.000135
start_geohash_dqcjpdb       -8.133682e-05 -0.000156
start_geohash_dqcjpe6        2.932359e-04  0.000358
start_geohash_dqcjpfw       -8.287191e-06  0.000139
start_geohash_dqcjpjs        6.297792e-04  0.001472
start_geohash_dqcjpjz       -6.935260e-04 -0.000031
start_geohash_dqcjpkf        1.492549e-05  0.000219
start_geohash_dqcjpkt       -2.879410e-04  0.000295
start_geohash_dqcjpm7       -1.664475e-04  0.000181
start_geohash_dqcjpm9       -8.983914e-04  0.000823
start_geohash_dqcjpmm       -1.223862e-03 -0.001725
start_geohash_dqcjpnx       -1.212108e-04  0.000126
start_geohash_dqcjpp6       -2.024898e-03  0.001007
start_geohash_dqcjppj       -1.202162e-04 -0.000333
start_geohash_dqcjppp       -9.826058e-05  0.000633
start_geohash_dqcjppt       -5.561338e-04  0.000755
start_geohash_dqcjpqd       -3.741136e-04  0.000404
start_geohash_dqcjpqj       -9.366069e-04  0.000905
start_geohash_dqcjpqt       -6.297269e-04  0.000183
start_geohash_dqcjpr4       -2.542824e-04 -0.000031
start_geohash_dqcjprj       -5.089604e-04  0.000148
start_geohash_dqcjprv       -7.023958e-04 -0.000454
start_geohash_dqcjps1       -2.953583e-05  0.000127
start_geohash_dqcjps4        6.173270e-05  0.000368
start_geohash_dqcjpsm        4.770236e-04  0.000087
start_geohash_dqcjpt1       -1.233134e-04  0.000589
start_geohash_dqcjptf       -7.222995e-06 -0.000193
start_geohash_dqcjptn       -3.600006e-04  0.000116
start_geohash_dqcjptp       -9.133223e-04 -0.000035
start_geohash_dqcjptq       -4.270749e-04 -0.000549
start_geohash_dqcjptt       -1.348827e-03  0.001097
start_geohash_dqcjpty       -2.926133e-04  0.000342
start_geohash_dqcjpub       -2.371617e-04  0.000240
start_geohash_dqcjpv2       -2.598923e-04  0.000060
start_geohash_dqcjpv3       -5.006423e-04 -0.000090
start_geohash_dqcjpvq       -2.157272e-04 -0.000027
start_geohash_dqcjpvv       -9.989419e-05  0.000238
start_geohash_dqcjpwv       -3.923693e-05  0.000329
start_geohash_dqcjpx0       -5.558488e-04 -0.000318
start_geohash_dqcjpy1        8.111907e-04  0.000598
start_geohash_dqcjpyb       -1.678548e-04 -0.000515
start_geohash_dqcjpyt       -4.805089e-04 -0.001193
start_geohash_dqcjpz0       -5.527860e-04 -0.000180
start_geohash_dqcjpzd       -2.366570e-04  0.000508
start_geohash_dqcjpzh        1.887370e-05  0.000618
start_geohash_dqcjpzt       -3.381302e-04 -0.000152
start_geohash_dqcjq00        3.721463e-05 -0.000107
start_geohash_dqcjq03       -1.085763e-05  0.000151
start_geohash_dqcjq06       -1.131262e-04 -0.000114
start_geohash_dqcjq0q       -6.296098e-05  0.000607
start_geohash_dqcjq0s       -1.341828e-04  0.000187
start_geohash_dqcjq0t        3.473016e-05  0.000239
start_geohash_dqcjq0y       -2.898315e-04  0.000515
start_geohash_dqcjq13       -1.178021e-05 -0.000145
start_geohash_dqcjq1j       -1.066764e-04 -0.000079
start_geohash_dqcjq2c        6.701685e-04  0.000484
start_geohash_dqcjq3z       -1.283652e-05  0.000105
start_geohash_dqcjq4y       -6.611111e-04  0.001446
start_geohash_dqcjq5u       -7.141845e-04  0.001124
start_geohash_dqcjq61       -6.757744e-05  0.000830
start_geohash_dqcjq6d       -1.088247e-03  0.000559
start_geohash_dqcjq6h       -5.979537e-05 -0.001034
start_geohash_dqcjq6n       -2.173926e-04  0.000904
start_geohash_dqcjq7f       -5.353536e-05  0.001180
start_geohash_dqcjq7h       -1.319559e-03 -0.000224
start_geohash_dqcjq8n       -3.197555e-04 -0.000247
start_geohash_dqcjq8y       -4.235341e-04  0.000362
start_geohash_dqcjq91        8.849608e-05  0.000688
start_geohash_dqcjq95       -5.897331e-04 -0.000272
start_geohash_dqcjq9q       -1.198430e-04  0.000618
start_geohash_dqcjq9v       -7.109178e-04 -0.000025
start_geohash_dqcjq9w       -5.377742e-04  0.000398
start_geohash_dqcjqb4       -1.858270e-04  0.000233
start_geohash_dqcjqb6       -6.809104e-05 -0.000076
start_geohash_dqcjqb9       -1.557555e-04  0.000525
start_geohash_dqcjqbb       -3.137713e-04  0.000341
start_geohash_dqcjqbc       -7.443359e-05  0.000138
start_geohash_dqcjqbd       -4.848877e-05  0.000072
start_geohash_dqcjqbk       -1.702803e-04 -0.000206
start_geohash_dqcjqbv       -1.415084e-04 -0.000006
start_geohash_dqcjqc0       -2.590600e-04  0.000672
start_geohash_dqcjqc4       -2.461537e-04  0.000469
start_geohash_dqcjqc8       -8.738265e-04  0.000642
start_geohash_dqcjqcb       -4.097430e-04  0.000020
start_geohash_dqcjqce       -2.850870e-04 -0.000546
start_geohash_dqcjqcf       -4.383317e-04  0.000364
start_geohash_dqcjqcj       -3.557642e-04 -0.000339
start_geohash_dqcjqck       -9.793864e-04  0.000836
start_geohash_dqcjqcy       -4.636061e-04 -0.000576
start_geohash_dqcjqd2       -1.440998e-03 -0.000681
start_geohash_dqcjqd5       -1.114348e-03  0.000285
start_geohash_dqcjqdm       -7.469649e-04 -0.000177
start_geohash_dqcjqdt       -2.029994e-05 -0.000390
start_geohash_dqcjqdx       -1.101118e-03  0.000065
start_geohash_dqcjqen       -6.261968e-04 -0.000129
start_geohash_dqcjqf1       -4.687449e-04  0.000408
start_geohash_dqcjqf5       -4.242070e-04  0.000566
start_geohash_dqcjqfk       -9.172859e-04  0.000546
start_geohash_dqcjqfn       -4.327456e-04  0.001394
start_geohash_dqcjqfp       -1.558052e-04  0.000166
start_geohash_dqcjqft       -4.888955e-04  0.001021
start_geohash_dqcjqfy       -9.956495e-05 -0.000785
start_geohash_dqcjqg1       -1.342450e-03 -0.000597
start_geohash_dqcjqgc       -1.532665e-03 -0.000646
start_geohash_dqcjqge       -4.598635e-04 -0.001256
start_geohash_dqcjqgg       -7.181981e-05 -0.000427
start_geohash_dqcjqgn       -4.683266e-04 -0.000588
start_geohash_dqcjqgq       -1.843417e-03  0.000336
start_geohash_dqcjqgw       -6.484885e-04  0.000301
start_geohash_dqcjqgz       -7.923527e-04  0.000828
start_geohash_dqcjqjd       -2.720006e-04 -0.000103
start_geohash_dqcjqjx        1.864544e-05 -0.000531
start_geohash_dqcjqk8       -4.437244e-04  0.000418
start_geohash_dqcjqn0       -6.086679e-05  0.000243
start_geohash_dqcjqp4       -6.860477e-05 -0.000417
start_geohash_dqcjqpv       -2.413525e-04  0.000841
start_geohash_dqcjqtu       -8.425277e-04 -0.000355
start_geohash_dqcjqtv       -3.477039e-04 -0.000214
start_geohash_dqcjqtx       -1.735766e-04  0.000286
start_geohash_dqcjqu3       -4.686913e-04 -0.000102
start_geohash_dqcjquc       -1.525341e-04 -0.000570
start_geohash_dqcjquq       -1.485822e-03 -0.000006
start_geohash_dqcjquu       -2.356440e-03 -0.002065
start_geohash_dqcjqv6       -6.286043e-04  0.001129
start_geohash_dqcjqve       -1.601024e-03 -0.002590
start_geohash_dqcjqvu       -1.332588e-03 -0.000062
start_geohash_dqcjqx1       -6.005562e-04  0.000327
start_geohash_dqcjqx8       -2.272340e-04 -0.000061
start_geohash_dqcjqyd       -3.458692e-04 -0.000188
start_geohash_dqcjqyr       -1.467377e-03 -0.000474
start_geohash_dqcjqyz       -6.181996e-04  0.000080
start_geohash_dqcjqzq       -9.093878e-04 -0.000052
start_geohash_dqcjqzu        3.890018e-05  0.000167
start_geohash_dqcjr04       -7.423522e-04 -0.000277
start_geohash_dqcjr0e       -4.444542e-04 -0.000569
start_geohash_dqcjr0f       -1.959663e-04  0.000040
start_geohash_dqcjr0r       -1.885087e-04  0.000092
start_geohash_dqcjr0z       -4.145895e-04  0.000221
start_geohash_dqcjr15       -6.254817e-04  0.000057
start_geohash_dqcjr16       -4.557842e-04  0.000960
start_geohash_dqcjr17       -2.320338e-04  0.000372
start_geohash_dqcjr19       -5.734998e-04 -0.000591
start_geohash_dqcjr1c       -5.207606e-04  0.000248
start_geohash_dqcjr1n       -9.473773e-04 -0.000212
start_geohash_dqcjr1p       -3.420158e-04 -0.000083
start_geohash_dqcjr1s       -3.765516e-04  0.000144
start_geohash_dqcjr1t       -5.924234e-04  0.000687
start_geohash_dqcjr1z       -4.381796e-04  0.000348
start_geohash_dqcjr22       -3.024926e-04  0.001308
start_geohash_dqcjr29       -3.297446e-04  0.000400
start_geohash_dqcjr2e       -2.261471e-04  0.000124
start_geohash_dqcjr2w       -1.817512e-04 -0.000045
start_geohash_dqcjr30       -2.316954e-04  0.000290
start_geohash_dqcjr33       -6.823286e-04 -0.000515
start_geohash_dqcjr34       -6.553971e-04  0.000019
start_geohash_dqcjr3h       -2.575163e-04  0.000579
start_geohash_dqcjr3r       -2.436740e-04 -0.000100
start_geohash_dqcjr3s       -4.976425e-04  0.000174
start_geohash_dqcjr40       -3.409280e-04  0.000274
start_geohash_dqcjr45       -7.973528e-04  0.001341
start_geohash_dqcjr49       -2.073745e-04  0.000614
start_geohash_dqcjr4d       -1.233599e-03 -0.000198
start_geohash_dqcjr4j       -3.917400e-04 -0.000532
start_geohash_dqcjr4w       -1.616738e-03 -0.001410
start_geohash_dqcjr53       -2.510312e-03 -0.001107
start_geohash_dqcjr54       -1.752365e-03  0.000411
start_geohash_dqcjr5e       -7.818403e-04 -0.000173
start_geohash_dqcjr5g       -1.876519e-03 -0.002704
start_geohash_dqcjr5h       -9.293022e-04  0.001141
start_geohash_dqcjr5n       -8.736780e-04 -0.001225
start_geohash_dqcjr64       -1.081437e-03 -0.000314
start_geohash_dqcjr6b       -7.405456e-04  0.000623
start_geohash_dqcjr6d       -1.352707e-03 -0.001315
start_geohash_dqcjr6h       -2.013586e-03 -0.000178
start_geohash_dqcjr6z       -7.837822e-04  0.000658
start_geohash_dqcjr71       -1.272294e-03  0.000941
start_geohash_dqcjr7c       -6.840517e-04 -0.000380
start_geohash_dqcjr7v       -5.470992e-04  0.000304
start_geohash_dqcjr80       -1.075503e-03 -0.000333
start_geohash_dqcjr8x       -1.200393e-03 -0.000565
start_geohash_dqcjr91       -1.075878e-03 -0.000147
start_geohash_dqcjr95       -7.041566e-04 -0.001543
start_geohash_dqcjr97       -5.374598e-04 -0.000039
start_geohash_dqcjr99       -6.452289e-04  0.000454
start_geohash_dqcjr9n       -2.586702e-03 -0.003495
start_geohash_dqcjr9r       -9.755865e-04 -0.000683
start_geohash_dqcjr9u       -3.007900e-04 -0.000779
start_geohash_dqcjr9y       -1.813834e-03 -0.000353
start_geohash_dqcjrb5       -3.825163e-04 -0.001013
start_geohash_dqcjrbj       -2.171983e-04 -0.000082
start_geohash_dqcjrbt       -4.113281e-04 -0.000011
start_geohash_dqcjrbz       -5.105964e-04  0.000410
start_geohash_dqcjrc8       -1.209915e-03 -0.001179
start_geohash_dqcjrce       -1.263998e-03  0.000193
start_geohash_dqcjrcn       -7.430203e-04  0.000737
start_geohash_dqcjrct       -7.983318e-04 -0.000028
start_geohash_dqcjrcv       -3.369988e-05  0.000121
start_geohash_dqcjrcx       -2.568467e-04 -0.000321
start_geohash_dqcjrd0       -4.931551e-04  0.000218
start_geohash_dqcjrd1       -2.846111e-04  0.000686
start_geohash_dqcjrdw       -1.505854e-04 -0.001060
start_geohash_dqcjre1        2.669464e-04 -0.000058
start_geohash_dqcjreb       -8.306416e-04  0.000359
start_geohash_dqcjreg       -5.815801e-04 -0.000007
start_geohash_dqcjrew       -4.587242e-04  0.001034
start_geohash_dqcjrf5       -4.795231e-04  0.000055
start_geohash_dqcjrf8       -6.579631e-04 -0.000321
start_geohash_dqcjrfb       -4.202024e-04  0.000270
start_geohash_dqcjrfc       -1.357766e-03 -0.000884
start_geohash_dqcjrff       -1.059754e-03  0.000027
start_geohash_dqcjrfq       -1.015515e-04 -0.000149
start_geohash_dqcjrfu        2.024783e-05 -0.000927
start_geohash_dqcjrg8       -4.055040e-04  0.000273
start_geohash_dqcjrhc       -6.004489e-04 -0.000180
start_geohash_dqcjrhg       -1.561892e-03 -0.001110
start_geohash_dqcjrhn       -7.051932e-04  0.000579
start_geohash_dqcjrhs       -8.603771e-04  0.000028
start_geohash_dqcjrht       -5.267803e-04 -0.000363
start_geohash_dqcjrhx       -6.385425e-04 -0.000518
start_geohash_dqcjrhy       -3.858877e-04 -0.000094
start_geohash_dqcjrj7       -1.743836e-03 -0.000565
start_geohash_dqcjrjb       -5.209804e-04  0.000328
start_geohash_dqcjrjp       -4.031999e-04 -0.000416
start_geohash_dqcjrjw       -4.157110e-04  0.000303
start_geohash_dqcjrk4       -1.271378e-03  0.000034
start_geohash_dqcjrk6       -7.670284e-04 -0.000367
start_geohash_dqcjrkc       -8.114822e-04 -0.000334
start_geohash_dqcjrkw       -3.261063e-04  0.000566
start_geohash_dqcjrmm       -3.062316e-05  0.000431
start_geohash_dqcjrn4       -4.360254e-04 -0.000855
start_geohash_dqcjrn6       -4.177229e-04 -0.000205
start_geohash_dqcjrnd       -9.789955e-04  0.000801
start_geohash_dqcjrnn       -9.491471e-04 -0.001862
start_geohash_dqcjrny       -6.807397e-04 -0.000432
start_geohash_dqcjrp5       -1.759941e-04 -0.000447
start_geohash_dqcjrp6       -3.505790e-04 -0.000114
start_geohash_dqcjrpf       -7.083400e-04  0.000777
start_geohash_dqcjrpj       -5.846667e-04 -0.000116
start_geohash_dqcjrq4       -6.978080e-05 -0.000009
start_geohash_dqcjrq9       -6.190931e-04  0.000239
start_geohash_dqcjrr3       -1.194816e-04 -0.000447
start_geohash_dqcjrs3       -1.182366e-03 -0.001200
start_geohash_dqcjrsh       -2.039854e-04 -0.000592
start_geohash_dqcjrsp       -2.419018e-04  0.000224
start_geohash_dqcjrsy       -1.886550e-04 -0.000081
start_geohash_dqcjruc       -2.821231e-04 -0.000947
start_geohash_dqcjrvb        2.153306e-04  0.000264
start_geohash_dqcjrvm       -7.010644e-05  0.000379
start_geohash_dqcjrvt       -1.233555e-04  0.000594
start_geohash_dqcjrvx       -4.543749e-05 -0.000450
start_geohash_dqcjrwc       -6.377608e-05 -0.000437
start_geohash_dqcjrys       -1.218570e-04 -0.000443
start_geohash_dqcjrzm       -4.232365e-04 -0.000241
start_geohash_dqcjrzr       -1.142773e-04  0.000148
start_geohash_dqcjrzv        2.151125e-05  0.000080
start_geohash_dqcjt2h        3.792841e-05  0.000011
start_geohash_dqcjtb4       -1.773531e-04 -0.001361
start_geohash_dqcjtbd       -1.211754e-04  0.000615
start_geohash_dqcjtfx       -4.906236e-05 -0.000022
start_geohash_dqcjtmr        8.566523e-07 -0.000139
start_geohash_dqcjtu9        6.841160e-05 -0.000093
start_geohash_dqcjtug        7.477923e-05 -0.000005
start_geohash_dqcjtuh       -2.043965e-06  0.000200
start_geohash_dqcjtxt        1.325626e-04  0.000137
start_geohash_dqcjv7d        3.352485e-04  0.000105
start_geohash_dqcjv87        1.343533e-03  0.000105
start_geohash_dqcjv8n        2.227658e-05 -0.000096
start_geohash_dqcjv8r        1.300167e-04  0.000175
start_geohash_dqcjv8v        9.186056e-04  0.000564
start_geohash_dqcjv9f        2.946499e-04 -0.000137
start_geohash_dqcjv9s        2.088087e-04 -0.000082
start_geohash_dqcjvd1        1.532275e-04 -0.000246
start_geohash_dqcjvd3        9.479638e-04  0.000251
start_geohash_dqcjvee        1.314018e-04 -0.000129
start_geohash_dqcjw0e       -7.191469e-05  0.000444
start_geohash_dqcjw14        1.007421e-05 -0.000012
start_geohash_dqcjw16       -1.985892e-05  0.000487
start_geohash_dqcjw1b       -3.583910e-06 -0.000046
start_geohash_dqcjw2p       -2.285386e-04 -0.000288
start_geohash_dqcjw3e       -1.654710e-05 -0.000502
start_geohash_dqcjw3j       -5.428541e-05 -0.000037
start_geohash_dqcjw40       -7.838657e-05  0.000058
start_geohash_dqcjw5x        1.495808e-04  0.000873
start_geohash_dqcjw64       -3.454026e-05 -0.000282
start_geohash_dqcjwcn        1.050396e-04 -0.000262
start_geohash_dqcjwj7        2.991928e-05  0.000411
start_geohash_dqcjwm2        2.836645e-05  0.000243
start_geohash_dqcjx06       -3.377996e-05  0.000602
start_geohash_dqcjx0n        1.899799e-04  0.000355
start_geohash_dqcjx16        7.306319e-05 -0.000050
start_geohash_dqcjx20       -3.897340e-04 -0.000649
start_geohash_dqcjx2k       -1.295446e-04  0.000371
start_geohash_dqcjx30       -5.359563e-05 -0.000165
start_geohash_dqcjx3h       -4.563126e-05 -0.000238
start_geohash_dqcjx44        8.504543e-05  0.000336
start_geohash_dqcjx4w        5.498082e-05 -0.000077
start_geohash_dqcjx5f        1.032321e-04 -0.000242
start_geohash_dqcjx5r        3.475885e-05  0.000273
start_geohash_dqcjx5v        3.104740e-05  0.000191
start_geohash_dqcjx66        1.492303e-04  0.000121
start_geohash_dqcjx7g        6.980985e-05  0.000079
start_geohash_dqcjxbr       -2.309845e-04 -0.000129
start_geohash_dqcjxdb       -4.027363e-05 -0.000211
start_geohash_dqcjxg0        9.320545e-05 -0.001337
start_geohash_dqcjxgb       -3.522698e-05  0.000020
start_geohash_dqcjxhw        3.113568e-04 -0.000041
start_geohash_dqcjxjy        9.582251e-05 -0.000173
start_geohash_dqcjxn1        1.652488e-04  0.000117
start_geohash_dqcjxpq        3.272189e-04  0.000324
start_geohash_dqcjxqn        1.884178e-04  0.000174
start_geohash_dqcjxrn        1.305353e-03  0.000215
start_geohash_dqcjxs0        1.417014e-04 -0.000097
start_geohash_dqcjxut        4.011064e-05 -0.000246
start_geohash_dqcjxx4        2.133402e-04  0.000053
start_geohash_dqcjxxt        1.351975e-04 -0.000055
start_geohash_dqcjxzj        1.938445e-04  0.000126
start_geohash_dqcjyfy        1.523679e-04  0.000071
start_geohash_dqcjz1h        1.869789e-05  0.000005
start_geohash_dqcjz31        1.632493e-04  0.000023
start_geohash_dqcjz4s        1.388619e-04 -0.000096
start_geohash_dqcjz51        1.936096e-04  0.000130
start_geohash_dqcjz5p        2.862590e-04  0.000296
start_geohash_dqcjz6n        2.208044e-04 -0.000073
start_geohash_dqcjz8v        3.436546e-04 -0.000385
start_geohash_dqcjzc6        6.772365e-04  0.000523
start_geohash_dqckbp7        1.540982e-04  0.000195
start_geohash_dqckbqs        1.003093e-04  0.000162
start_geohash_dqckbrk        4.121073e-05  0.000024
start_geohash_dqcm01m        7.103337e-05  0.000031
start_geohash_dqcm04q        3.762562e-05  0.000268
start_geohash_dqcm05d       -2.038756e-05  0.000161
start_geohash_dqcm05m       -1.540153e-05 -0.000154
start_geohash_dqcm0d8        6.458140e-05  0.000124
start_geohash_dqcm0ez        1.134896e-04 -0.000196
start_geohash_dqcm0hp        2.164264e-05 -0.000128
start_geohash_dqcm0hw        7.757184e-05  0.000203
start_geohash_dqcm0ju       -1.576846e-04  0.000494
start_geohash_dqcm0kx        3.228386e-05 -0.000053
start_geohash_dqcm0mx        4.582464e-04  0.000738
start_geohash_dqcm0n3       -2.071447e-04  0.000140
start_geohash_dqcm0n7       -1.330126e-05 -0.000236
start_geohash_dqcm0nb        3.441668e-04 -0.000055
start_geohash_dqcm0ns        3.931534e-04 -0.000137
start_geohash_dqcm0pf       -4.395849e-04 -0.000366
start_geohash_dqcm0pn       -3.077699e-04  0.000250
start_geohash_dqcm0pw       -3.510094e-04 -0.000294
start_geohash_dqcm0q2       -4.647856e-05  0.000258
start_geohash_dqcm0qf       -1.561026e-04  0.000141
start_geohash_dqcm0re       -2.484681e-04  0.000127
start_geohash_dqcm0xy        2.745498e-05 -0.000041
start_geohash_dqcm1r8        6.981231e-05  0.000315
start_geohash_dqcm1w8        1.558219e-04 -0.000160
start_geohash_dqcm20e       -1.938104e-04 -0.000003
start_geohash_dqcm20q       -1.305344e-04 -0.000087
start_geohash_dqcm21d       -1.366017e-04  0.000548
start_geohash_dqcm21n       -4.458867e-04 -0.000340
start_geohash_dqcm21w       -2.354232e-04 -0.000293
start_geohash_dqcm22s       -8.065162e-05 -0.000202
start_geohash_dqcm231       -1.608145e-04 -0.000189
start_geohash_dqcm23c       -3.080371e-04 -0.001027
start_geohash_dqcm245       -4.134351e-05 -0.000107
start_geohash_dqcm24g       -1.517137e-04 -0.000008
start_geohash_dqcm24w       -1.080201e-04  0.000027
start_geohash_dqcm25s        1.720326e-05 -0.000155
start_geohash_dqcm27y        2.365645e-04  0.000723
start_geohash_dqcm28c        2.343347e-05  0.000067
start_geohash_dqcm290       -3.736766e-05  0.000101
start_geohash_dqcm2hq       -3.175573e-05  0.000518
start_geohash_dqcm2ju       -2.197734e-05  0.000047
start_geohash_dqcm2n8       -3.272044e-05 -0.000796
start_geohash_dqcm2p8       -6.428822e-05  0.000318
start_geohash_dqcm2r9       -7.727885e-05 -0.000066
start_geohash_dqcm2rn        1.184983e-04 -0.000160
start_geohash_dqcm2w5        4.276470e-06 -0.000042
start_geohash_dqcm318       -7.864146e-06  0.000046
start_geohash_dqcm375        1.231010e-04  0.000121
start_geohash_dqcm5z9        4.403056e-04  0.000312
start_geohash_dqcm737        4.321733e-04  0.000024
start_geohash_dqcm813        8.578056e-06 -0.000201
start_geohash_dqcm830        3.642437e-05 -0.000224
start_geohash_dqcm839        4.561588e-05 -0.000020
start_geohash_dqcm8be        8.694923e-05 -0.000029
start_geohash_dqcm8e9        4.448160e-04  0.000411
start_geohash_dqcm8er        2.569978e-04 -0.000407
start_geohash_dqcm8q8        1.780363e-04 -0.000019
start_geohash_dqcm8v7        2.765322e-04 -0.000208
start_geohash_dqcm8y5        1.658995e-04  0.000069
start_geohash_dqcm95t        1.528562e-04 -0.000232
start_geohash_dqcm9h3        1.495233e-04  0.000165
start_geohash_dqcm9kc        5.071821e-04  0.000457
start_geohash_dqcm9np        2.072013e-04 -0.000047
start_geohash_dqcmfeg        4.021840e-04  0.000168
start_geohash_dqcnhuw        1.065863e-03 -0.000245
start_geohash_dqcnj5x        3.568539e-04 -0.000212
start_geohash_dqcnk9e        4.937358e-04 -0.000018
start_geohash_dqcnke2        4.788696e-04 -0.000036
start_geohash_dqcnkjn        1.496524e-03 -0.000410
start_geohash_dqcnndr        2.004154e-04 -0.000139
start_geohash_dqcnndt        1.042013e-03 -0.000305
start_geohash_dqcnnsm        8.840180e-04 -0.000335
start_geohash_dqcns19        5.643528e-04  0.000132
start_geohash_dqcns48        6.884501e-04  0.000061
start_geohash_dqcq35j        4.640535e-04 -0.000229
start_capacity_bin_15–30 $  -5.198665e-02  0.010980
start_capacity_bin_30–60 $  -2.088911e-02 -0.007563
start_capacity_bin_7–10 $    4.112017e-04 -0.000407
In [ ]:
n_clusters = 3 
agg_clustering = AgglomerativeClustering(n_clusters=n_clusters, metric='euclidean', linkage='ward')
cluster_labels = agg_clustering.fit_predict(X_processed)
algo_name = "AgglomerativeClustering"


grouped_sample['cluster_label'] = cluster_labels
grouped_sample['cluster_label'] = grouped_sample['cluster_label'].astype(str) 


if len(set(cluster_labels)) > 1: 
    sil_score = silhouette_score(X_processed, cluster_labels)
    print(f"{algo_name} - Silhouette Score = {sil_score:.4f}")
else:
    print(f"{algo_name} - عدد العناقيد غير كافٍ، لا يمكن حساب Silhouette Score.")



print(f"\n--- Cluster Description for {algo_name} ---")

print("\nAverage Values of Numerical Features per Cluster:")
numerical_summary = grouped_sample.groupby('cluster_label')[numeric_features].mean()
print(numerical_summary)

print("\nDistribution of Categorical Features per Cluster:")
for feature in categorical_features:
    print(f"\nFeature: {feature}")
    cluster_counts = grouped_sample.groupby(['cluster_label', feature]).size().unstack(fill_value=0)
    cluster_percentages = cluster_counts.apply(lambda x: x / x.sum(), axis=1) * 100
    print(cluster_percentages.round(2)) 
print("\nCluster Sizes:")
print(grouped_sample['cluster_label'].value_counts().sort_index())

pca = PCA(n_components=2)
components = pca.fit_transform(X_processed)

pca_df = pd.DataFrame(data = components, columns = ['PC1', 'PC2'])
pca_df['cluster_label'] = grouped_sample['cluster_label']

fig = px.scatter(
    pca_df,
    x='PC1',
    y='PC2',
    color='cluster_label',
    title=f'{algo_name} Clustering (PCA Reduced Dimensions)',
    labels={'PC1': 'Principal Component 1', 'PC2': 'Principal Component 2'}
)

fig.add_trace(go.Scatter(
    x=centroids_pca[:, 0], y=centroids_pca[:, 1],
    mode='markers',
    marker=dict(size=15, symbol='x', color='black'),
    name='Centroids'
))

#fig.show()
image_path = r"C:\Users\ASUS\OneDrive\Desktop\images\Screenshot 2025-06-20 185853.png"

image = Image.open(image_path)
display(image)

processed_feature_names = numeric_features[:]
preprocessor.fit(grouped_sample) 
ohe_feature_names = preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_features)
processed_feature_names.extend(ohe_feature_names)

loadings = pd.DataFrame(pca.components_.T, columns=['PC1', 'PC2'], index=processed_feature_names)
print("\nPCA Loadings (Contribution of Original Features to Principal Components):")
print(loadings.to_string())
AgglomerativeClustering - Silhouette Score = 0.0733

--- Cluster Description for AgglomerativeClustering ---

Average Values of Numerical Features per Cluster:
                    temp   humidity  windspeed  ride_duration_min  \
cluster_label                                                       
0              17.410642  64.383970  24.208004          16.279209   
1              15.403571  63.510714  21.771429          86.464286   
2               5.338426  66.964576  12.494463          11.096264   

               cost_category  distance_to_business_area  \
cluster_label                                             
0                   2.314330                   0.018310   
1                   2.535714                   0.292122   
2                   2.378919                   0.013400   

               start_distance_to_shuttle_m  start_distance_to_metro_m  
cluster_label                                                          
0                              1313.107671                 125.703616  
1                             28337.557629                2265.220885  
2                               815.563954                 104.854869  

Distribution of Categorical Features per Cluster:

Feature: rideable_type
rideable_type  classic_bike  electric_bike
cluster_label                             
0                     50.79          49.21
1                     64.29          35.71
2                     47.36          52.64

Feature: member_casual
member_casual  casual  member
cluster_label                
0               50.68   49.32
1               82.14   17.86
2               47.36   52.64

Feature: weather_bin
weather_bin    Cloudy  Rainy  Sunny
cluster_label                      
0               12.50  44.00  43.51
1               25.00  32.14  42.86
2               95.66   1.53   2.80

Feature: start_geohash
start_geohash  dqbvrzn  dqbvx3p  dqbvx7w  dqbvx96  dqbvxc8  dqbvxe9  dqbvxek  \
cluster_label                                                                  
0                 0.00     0.00     0.00     0.00     0.00     0.00     0.00   
1                 3.57     7.14     3.57     3.57     7.14     3.57    14.29   
2                 0.00     0.00     0.00     0.00     0.00     0.00     0.00   

start_geohash  dqbvxeu  dqbvxfe  dqbvxgd  ...  dqcnj5x  dqcnk9e  dqcnke2  \
cluster_label                             ...                              
0                 0.00     0.00     0.00  ...     0.02     0.02     0.02   
1                 3.57     7.14     3.57  ...     0.00     0.00     0.00   
2                 0.00     0.00     0.00  ...     0.00     0.00     0.00   

start_geohash  dqcnkjn  dqcnndr  dqcnndt  dqcnnsm  dqcns19  dqcns48  dqcq35j  
cluster_label                                                                 
0                 0.07     0.00     0.09     0.04     0.02     0.02     0.02  
1                 0.00     0.00     0.00     0.00     0.00     0.00     0.00  
2                 0.00     0.07     0.00     0.07     0.00     0.00     0.00  

[3 rows x 606 columns]

Feature: start_capacity_bin
start_capacity_bin  10–15 $  15–30 $  30–60 $  7–10 $
cluster_label                                        
0                     22.71    65.75    11.47    0.07
1                     82.14    17.86     0.00    0.00
2                     20.01    64.78    15.01    0.20

Cluster Sizes:
cluster_label
0    4473
1      28
2    1499
Name: count, dtype: int64
No description has been provided for this image
PCA Loadings (Contribution of Original Features to Principal Components):
                                      PC1       PC2
temp                         1.473775e-02  0.383722
humidity                    -2.515442e-02 -0.529548
windspeed                    5.267178e-03  0.625646
ride_duration_min            7.275576e-02  0.250793
cost_category                6.636345e-02 -0.264933
distance_to_business_area    6.096214e-01 -0.026776
start_distance_to_shuttle_m  6.227339e-01 -0.025858
start_distance_to_metro_m    4.736822e-01  0.020366
rideable_type_electric_bike -1.449674e-02  0.002489
member_casual_member        -4.160469e-02  0.060349
weather_bin_Rainy           -7.180000e-03 -0.009047
weather_bin_Sunny            1.317847e-02  0.204815
start_geohash_dqbvx3p        2.398170e-03 -0.000365
start_geohash_dqbvx7w        1.081112e-03  0.000072
start_geohash_dqbvx96        1.238706e-03  0.000015
start_geohash_dqbvxc8        2.134598e-03 -0.000409
start_geohash_dqbvxe9        1.194380e-03  0.000014
start_geohash_dqbvxek        4.865869e-03 -0.000163
start_geohash_dqbvxeu        1.221426e-03  0.000229
start_geohash_dqbvxfe        2.015529e-03  0.000190
start_geohash_dqbvxgd        1.049133e-03 -0.000209
start_geohash_dqbvxs5        1.231048e-03 -0.000009
start_geohash_dqbvxsm        2.461707e-03 -0.000091
start_geohash_dqbvxsw        2.396488e-03 -0.000162
start_geohash_dqbvxts        2.633608e-03 -0.000037
start_geohash_dqbvxv0        1.193487e-03 -0.000195
start_geohash_dqbvxzb        2.820167e-03  0.000116
start_geohash_dqchbuk        7.681811e-04 -0.000093
start_geohash_dqchbv1        7.979695e-04 -0.000110
start_geohash_dqchby0        6.562643e-04 -0.000093
start_geohash_dqchbyf        7.080968e-04 -0.000231
start_geohash_dqchtzv        6.710258e-04  0.000129
start_geohash_dqchuch        3.118400e-04  0.000041
start_geohash_dqchv1g        2.512065e-04  0.000097
start_geohash_dqchv37        2.294478e-04  0.000239
start_geohash_dqchv64        2.386256e-04 -0.000244
start_geohash_dqchv8b        2.514165e-04  0.000367
start_geohash_dqchvu3        1.950884e-04 -0.000263
start_geohash_dqchvue        1.758171e-04 -0.000011
start_geohash_dqchvwn        1.566542e-04  0.000147
start_geohash_dqchvy9        1.371230e-04  0.000097
start_geohash_dqchvz1        6.943647e-04  0.000127
start_geohash_dqchvz7        1.345267e-04  0.000323
start_geohash_dqchwpv        4.885834e-04  0.000152
start_geohash_dqchwr8        3.941061e-04 -0.000014
start_geohash_dqchwrg        4.367925e-04 -0.000235
start_geohash_dqchwwg        2.745263e-04 -0.000018
start_geohash_dqchwwn        2.182080e-03 -0.000927
start_geohash_dqchwwy        4.133777e-04 -0.000037
start_geohash_dqchwxr        2.020046e-04 -0.000157
start_geohash_dqchwz3        2.641399e-04 -0.000058
start_geohash_dqchwzs        3.406984e-04  0.000042
start_geohash_dqchwzv        3.019624e-04 -0.000033
start_geohash_dqchxkn        7.947746e-04 -0.000120
start_geohash_dqchxky        1.538005e-03  0.000169
start_geohash_dqchxyb        4.542322e-04 -0.000093
start_geohash_dqchy2q        5.975283e-04 -0.000352
start_geohash_dqchy2w        6.242084e-04  0.000063
start_geohash_dqchy3t        5.814952e-04 -0.000120
start_geohash_dqchy6u        5.074584e-04 -0.000334
start_geohash_dqchy83        4.889999e-04 -0.000102
start_geohash_dqchy87        3.078158e-04 -0.000009
start_geohash_dqchy8m        7.288436e-04 -0.000030
start_geohash_dqchy9g        1.755974e-03 -0.000403
start_geohash_dqchy9t        1.599728e-04 -0.000135
start_geohash_dqchy9y        2.119949e-04  0.000045
start_geohash_dqchyb1        1.118963e-03  0.000665
start_geohash_dqchyb4        7.544982e-04 -0.000040
start_geohash_dqchyc3        5.287096e-04  0.000372
start_geohash_dqchycg        4.168210e-04 -0.000210
start_geohash_dqchydb        4.993339e-04 -0.000098
start_geohash_dqchydx        1.493128e-04 -0.000193
start_geohash_dqchye5        1.042088e-03 -0.000101
start_geohash_dqchyeq        3.027016e-04 -0.000005
start_geohash_dqchyer        1.880741e-04  0.000182
start_geohash_dqchykp        1.467629e-04 -0.000072
start_geohash_dqchymq        2.962826e-04 -0.000236
start_geohash_dqchypz        3.516381e-04  0.000366
start_geohash_dqchyqu        8.731297e-05  0.000072
start_geohash_dqchyr5        8.211161e-05  0.000062
start_geohash_dqchysx        1.640613e-04 -0.000180
start_geohash_dqchytn        5.914669e-04  0.000115
start_geohash_dqchytr        2.270930e-04 -0.000249
start_geohash_dqchyw0        4.976008e-04 -0.000487
start_geohash_dqchyxj        3.478416e-04  0.000133
start_geohash_dqchyxm        3.473857e-04  0.000189
start_geohash_dqchyxs        2.923656e-04 -0.000034
start_geohash_dqchzth        6.739836e-05  0.000151
start_geohash_dqcj0bk        7.473412e-04 -0.000294
start_geohash_dqcj1eu        5.718829e-04  0.000047
start_geohash_dqcj1se        1.169850e-03  0.000037
start_geohash_dqcj1tm        1.115998e-03 -0.000538
start_geohash_dqcj4ey        9.670979e-04 -0.000401
start_geohash_dqcj4s4        4.997773e-04 -0.000121
start_geohash_dqcj4sd        5.189617e-04  0.000010
start_geohash_dqcj4tb        5.234895e-04 -0.000004
start_geohash_dqcj4td        5.025708e-04 -0.000224
start_geohash_dqcj4v8        4.922787e-04 -0.000238
start_geohash_dqcj4wk        1.852601e-03  0.000106
start_geohash_dqcj5j3        4.267466e-04 -0.000131
start_geohash_dqcj5mk        3.585692e-04  0.000050
start_geohash_dqcj5vb        1.012884e-03  0.000476
start_geohash_dqcj5vv        3.185863e-04  0.000185
start_geohash_dqcj5xs        3.073348e-04 -0.000218
start_geohash_dqcj5yg        2.635110e-04 -0.000164
start_geohash_dqcj5ys        1.118831e-03 -0.000275
start_geohash_dqcj5z1        2.756931e-04 -0.000013
start_geohash_dqcj6em        9.891087e-04 -0.000097
start_geohash_dqcj6uz        4.694017e-04  0.000077
start_geohash_dqcj6v1        4.053152e-04 -0.000085
start_geohash_dqcj6v2        8.394460e-04 -0.000364
start_geohash_dqcj6y9        4.856870e-04  0.000037
start_geohash_dqcj6yk        9.489153e-04 -0.000065
start_geohash_dqcj6z6        5.362408e-04 -0.000021
start_geohash_dqcj71s        3.896685e-04 -0.000066
start_geohash_dqcj7jf        3.811973e-04 -0.000203
start_geohash_dqcj845        8.212473e-04 -0.000097
start_geohash_dqcj861        1.878261e-03  0.001363
start_geohash_dqcjhkd        2.389620e-04  0.000119
start_geohash_dqcjhph        5.057113e-04 -0.000297
start_geohash_dqcjhqb        1.217906e-03 -0.000005
start_geohash_dqcjhvj        4.309662e-04  0.000102
start_geohash_dqcjhwf        2.081433e-04  0.000111
start_geohash_dqcjhwp        6.092856e-04 -0.000380
start_geohash_dqcjhzm        2.646957e-04 -0.000071
start_geohash_dqcjj1h        1.660411e-04  0.000125
start_geohash_dqcjj39        1.000524e-03 -0.000039
start_geohash_dqcjj3d        1.365617e-04 -0.000030
start_geohash_dqcjj6p        1.227731e-04 -0.000136
start_geohash_dqcjj7y        1.419961e-04 -0.000031
start_geohash_dqcjj82        5.021413e-05 -0.000044
start_geohash_dqcjj8j        2.680392e-04 -0.000283
start_geohash_dqcjjbh        7.786307e-05 -0.000058
start_geohash_dqcjjdq        1.812868e-04 -0.000084
start_geohash_dqcjjeu        2.680639e-04 -0.000325
start_geohash_dqcjjf9        6.385998e-05 -0.000053
start_geohash_dqcjjfz        6.146228e-05  0.000234
start_geohash_dqcjjgn        1.551323e-04  0.000061
start_geohash_dqcjjjd        2.455368e-04  0.000126
start_geohash_dqcjjjz        1.243995e-04 -0.000050
start_geohash_dqcjjk7        2.213994e-04  0.000237
start_geohash_dqcjjkv        1.535454e-04 -0.000188
start_geohash_dqcjjm2        3.158521e-04  0.000024
start_geohash_dqcjjm5        2.266157e-04 -0.000274
start_geohash_dqcjjmg        8.044290e-04 -0.000134
start_geohash_dqcjjnt        1.158904e-04  0.000165
start_geohash_dqcjjq0        1.449634e-03  0.001529
start_geohash_dqcjjq2        4.102157e-04 -0.000260
start_geohash_dqcjjq6        1.042365e-04 -0.000105
start_geohash_dqcjjqe        5.436979e-04  0.000542
start_geohash_dqcjjqr        6.640794e-04 -0.000411
start_geohash_dqcjjr3        2.101405e-04  0.000180
start_geohash_dqcjjuc        8.770218e-05  0.000053
start_geohash_dqcjjuw        4.531730e-04 -0.000377
start_geohash_dqcjjw9        3.526708e-04 -0.000171
start_geohash_dqcjjwg        4.706690e-04  0.000184
start_geohash_dqcjjwy        6.655027e-04  0.001565
start_geohash_dqcjjxh        1.068689e-03 -0.000237
start_geohash_dqcjjxq        2.274356e-04  0.000017
start_geohash_dqcjjxx        4.607847e-04 -0.000514
start_geohash_dqcjjy0        3.705963e-04 -0.000381
start_geohash_dqcjjy5        2.545475e-04 -0.000554
start_geohash_dqcjjyf        1.276726e-04  0.000103
start_geohash_dqcjjzc        5.894730e-05 -0.000251
start_geohash_dqcjjzh        2.271638e-04 -0.000357
start_geohash_dqcjjzu        2.432277e-04 -0.000126
start_geohash_dqcjjzz        1.592483e-04 -0.000176
start_geohash_dqcjkfm        1.655164e-04 -0.000131
start_geohash_dqcjm1h        3.319840e-04 -0.000226
start_geohash_dqcjm2y        2.541988e-05  0.000101
start_geohash_dqcjm8e        2.929376e-05 -0.000003
start_geohash_dqcjm8x        3.294295e-04 -0.000842
start_geohash_dqcjmb7        9.050119e-05 -0.000049
start_geohash_dqcjmbc        8.724185e-05 -0.000117
start_geohash_dqcjmbg        2.424257e-04 -0.000359
start_geohash_dqcjmbh        1.984614e-04  0.000058
start_geohash_dqcjmbn        1.274706e-05  0.000208
start_geohash_dqcjmcp       -6.393776e-05  0.000202
start_geohash_dqcjmgk       -5.031578e-05  0.000182
start_geohash_dqcjmqq       -2.518678e-05  0.000362
start_geohash_dqcjmss        1.009443e-04 -0.000133
start_geohash_dqcjmu8        8.136443e-05  0.000325
start_geohash_dqcjmze       -9.451252e-07  0.000113
start_geohash_dqcjn05        4.080070e-04 -0.000013
start_geohash_dqcjn2y        4.073061e-05  0.000096
start_geohash_dqcjn3z        2.852370e-04  0.000198
start_geohash_dqcjn56        3.581129e-04  0.000022
start_geohash_dqcjn5t        5.374516e-05 -0.000211
start_geohash_dqcjn6h        4.117995e-04 -0.000282
start_geohash_dqcjn6u        1.667665e-04  0.000121
start_geohash_dqcjn6w        2.595637e-04 -0.000398
start_geohash_dqcjn8j        3.386800e-04 -0.000042
start_geohash_dqcjn8s        1.626415e-04 -0.000013
start_geohash_dqcjn8y        2.549463e-04 -0.000097
start_geohash_dqcjn97        2.536706e-04  0.000102
start_geohash_dqcjn9u        4.417608e-04 -0.000119
start_geohash_dqcjn9w        3.229517e-04 -0.000184
start_geohash_dqcjnbb        9.664149e-05  0.000123
start_geohash_dqcjnd5        4.291358e-04  0.000288
start_geohash_dqcjndf        4.123165e-04  0.000600
start_geohash_dqcjndq        4.998741e-04  0.000197
start_geohash_dqcjne0        8.746006e-05  0.000059
start_geohash_dqcjnem        1.085604e-04 -0.000079
start_geohash_dqcjngj        3.072502e-03  0.000733
start_geohash_dqcjnp7        1.071649e-05 -0.000012
start_geohash_dqcjnpg        1.112874e-05 -0.000322
start_geohash_dqcjnpw        7.260530e-05  0.000160
start_geohash_dqcjnqd        7.180358e-05 -0.000444
start_geohash_dqcjnu4        8.967613e-04  0.000064
start_geohash_dqcjnvz        3.045952e-03  0.001145
start_geohash_dqcjnxq        5.836633e-04  0.002781
start_geohash_dqcjnxy       -9.145158e-04  0.001195
start_geohash_dqcjny8        1.124962e-03  0.000090
start_geohash_dqcjnzq        7.557045e-04  0.001245
start_geohash_dqcjp9n       -1.415618e-05  0.000135
start_geohash_dqcjpdb       -8.133682e-05 -0.000156
start_geohash_dqcjpe6        2.932359e-04  0.000358
start_geohash_dqcjpfw       -8.287191e-06  0.000139
start_geohash_dqcjpjs        6.297792e-04  0.001472
start_geohash_dqcjpjz       -6.935260e-04 -0.000031
start_geohash_dqcjpkf        1.492549e-05  0.000219
start_geohash_dqcjpkt       -2.879410e-04  0.000295
start_geohash_dqcjpm7       -1.664475e-04  0.000181
start_geohash_dqcjpm9       -8.983914e-04  0.000823
start_geohash_dqcjpmm       -1.223862e-03 -0.001725
start_geohash_dqcjpnx       -1.212108e-04  0.000126
start_geohash_dqcjpp6       -2.024898e-03  0.001007
start_geohash_dqcjppj       -1.202162e-04 -0.000333
start_geohash_dqcjppp       -9.826058e-05  0.000633
start_geohash_dqcjppt       -5.561338e-04  0.000755
start_geohash_dqcjpqd       -3.741136e-04  0.000404
start_geohash_dqcjpqj       -9.366069e-04  0.000905
start_geohash_dqcjpqt       -6.297269e-04  0.000183
start_geohash_dqcjpr4       -2.542824e-04 -0.000031
start_geohash_dqcjprj       -5.089604e-04  0.000148
start_geohash_dqcjprv       -7.023958e-04 -0.000454
start_geohash_dqcjps1       -2.953583e-05  0.000127
start_geohash_dqcjps4        6.173270e-05  0.000368
start_geohash_dqcjpsm        4.770236e-04  0.000087
start_geohash_dqcjpt1       -1.233134e-04  0.000589
start_geohash_dqcjptf       -7.222995e-06 -0.000193
start_geohash_dqcjptn       -3.600006e-04  0.000116
start_geohash_dqcjptp       -9.133223e-04 -0.000035
start_geohash_dqcjptq       -4.270749e-04 -0.000549
start_geohash_dqcjptt       -1.348827e-03  0.001097
start_geohash_dqcjpty       -2.926133e-04  0.000342
start_geohash_dqcjpub       -2.371617e-04  0.000240
start_geohash_dqcjpv2       -2.598923e-04  0.000060
start_geohash_dqcjpv3       -5.006423e-04 -0.000090
start_geohash_dqcjpvq       -2.157272e-04 -0.000027
start_geohash_dqcjpvv       -9.989419e-05  0.000238
start_geohash_dqcjpwv       -3.923693e-05  0.000329
start_geohash_dqcjpx0       -5.558488e-04 -0.000318
start_geohash_dqcjpy1        8.111907e-04  0.000598
start_geohash_dqcjpyb       -1.678548e-04 -0.000515
start_geohash_dqcjpyt       -4.805089e-04 -0.001193
start_geohash_dqcjpz0       -5.527860e-04 -0.000180
start_geohash_dqcjpzd       -2.366570e-04  0.000508
start_geohash_dqcjpzh        1.887370e-05  0.000618
start_geohash_dqcjpzt       -3.381302e-04 -0.000152
start_geohash_dqcjq00        3.721463e-05 -0.000107
start_geohash_dqcjq03       -1.085763e-05  0.000151
start_geohash_dqcjq06       -1.131262e-04 -0.000114
start_geohash_dqcjq0q       -6.296098e-05  0.000607
start_geohash_dqcjq0s       -1.341828e-04  0.000187
start_geohash_dqcjq0t        3.473016e-05  0.000239
start_geohash_dqcjq0y       -2.898315e-04  0.000515
start_geohash_dqcjq13       -1.178021e-05 -0.000145
start_geohash_dqcjq1j       -1.066764e-04 -0.000079
start_geohash_dqcjq2c        6.701685e-04  0.000484
start_geohash_dqcjq3z       -1.283652e-05  0.000105
start_geohash_dqcjq4y       -6.611111e-04  0.001446
start_geohash_dqcjq5u       -7.141845e-04  0.001124
start_geohash_dqcjq61       -6.757744e-05  0.000830
start_geohash_dqcjq6d       -1.088247e-03  0.000559
start_geohash_dqcjq6h       -5.979537e-05 -0.001034
start_geohash_dqcjq6n       -2.173926e-04  0.000904
start_geohash_dqcjq7f       -5.353536e-05  0.001180
start_geohash_dqcjq7h       -1.319559e-03 -0.000224
start_geohash_dqcjq8n       -3.197555e-04 -0.000247
start_geohash_dqcjq8y       -4.235341e-04  0.000362
start_geohash_dqcjq91        8.849608e-05  0.000688
start_geohash_dqcjq95       -5.897331e-04 -0.000272
start_geohash_dqcjq9q       -1.198430e-04  0.000618
start_geohash_dqcjq9v       -7.109178e-04 -0.000025
start_geohash_dqcjq9w       -5.377742e-04  0.000398
start_geohash_dqcjqb4       -1.858270e-04  0.000233
start_geohash_dqcjqb6       -6.809104e-05 -0.000076
start_geohash_dqcjqb9       -1.557555e-04  0.000525
start_geohash_dqcjqbb       -3.137713e-04  0.000341
start_geohash_dqcjqbc       -7.443359e-05  0.000138
start_geohash_dqcjqbd       -4.848877e-05  0.000072
start_geohash_dqcjqbk       -1.702803e-04 -0.000206
start_geohash_dqcjqbv       -1.415084e-04 -0.000006
start_geohash_dqcjqc0       -2.590600e-04  0.000672
start_geohash_dqcjqc4       -2.461537e-04  0.000469
start_geohash_dqcjqc8       -8.738265e-04  0.000642
start_geohash_dqcjqcb       -4.097430e-04  0.000020
start_geohash_dqcjqce       -2.850870e-04 -0.000546
start_geohash_dqcjqcf       -4.383317e-04  0.000364
start_geohash_dqcjqcj       -3.557642e-04 -0.000339
start_geohash_dqcjqck       -9.793864e-04  0.000836
start_geohash_dqcjqcy       -4.636061e-04 -0.000576
start_geohash_dqcjqd2       -1.440998e-03 -0.000681
start_geohash_dqcjqd5       -1.114348e-03  0.000285
start_geohash_dqcjqdm       -7.469649e-04 -0.000177
start_geohash_dqcjqdt       -2.029994e-05 -0.000390
start_geohash_dqcjqdx       -1.101118e-03  0.000065
start_geohash_dqcjqen       -6.261968e-04 -0.000129
start_geohash_dqcjqf1       -4.687449e-04  0.000408
start_geohash_dqcjqf5       -4.242070e-04  0.000566
start_geohash_dqcjqfk       -9.172859e-04  0.000546
start_geohash_dqcjqfn       -4.327456e-04  0.001394
start_geohash_dqcjqfp       -1.558052e-04  0.000166
start_geohash_dqcjqft       -4.888955e-04  0.001021
start_geohash_dqcjqfy       -9.956495e-05 -0.000785
start_geohash_dqcjqg1       -1.342450e-03 -0.000597
start_geohash_dqcjqgc       -1.532665e-03 -0.000646
start_geohash_dqcjqge       -4.598635e-04 -0.001256
start_geohash_dqcjqgg       -7.181981e-05 -0.000427
start_geohash_dqcjqgn       -4.683266e-04 -0.000588
start_geohash_dqcjqgq       -1.843417e-03  0.000336
start_geohash_dqcjqgw       -6.484885e-04  0.000301
start_geohash_dqcjqgz       -7.923527e-04  0.000828
start_geohash_dqcjqjd       -2.720006e-04 -0.000103
start_geohash_dqcjqjx        1.864544e-05 -0.000531
start_geohash_dqcjqk8       -4.437244e-04  0.000418
start_geohash_dqcjqn0       -6.086679e-05  0.000243
start_geohash_dqcjqp4       -6.860477e-05 -0.000417
start_geohash_dqcjqpv       -2.413525e-04  0.000841
start_geohash_dqcjqtu       -8.425277e-04 -0.000355
start_geohash_dqcjqtv       -3.477039e-04 -0.000214
start_geohash_dqcjqtx       -1.735766e-04  0.000286
start_geohash_dqcjqu3       -4.686913e-04 -0.000102
start_geohash_dqcjquc       -1.525341e-04 -0.000570
start_geohash_dqcjquq       -1.485822e-03 -0.000006
start_geohash_dqcjquu       -2.356440e-03 -0.002065
start_geohash_dqcjqv6       -6.286043e-04  0.001129
start_geohash_dqcjqve       -1.601024e-03 -0.002590
start_geohash_dqcjqvu       -1.332588e-03 -0.000062
start_geohash_dqcjqx1       -6.005562e-04  0.000327
start_geohash_dqcjqx8       -2.272340e-04 -0.000061
start_geohash_dqcjqyd       -3.458692e-04 -0.000188
start_geohash_dqcjqyr       -1.467377e-03 -0.000474
start_geohash_dqcjqyz       -6.181996e-04  0.000080
start_geohash_dqcjqzq       -9.093878e-04 -0.000052
start_geohash_dqcjqzu        3.890018e-05  0.000167
start_geohash_dqcjr04       -7.423522e-04 -0.000277
start_geohash_dqcjr0e       -4.444542e-04 -0.000569
start_geohash_dqcjr0f       -1.959663e-04  0.000040
start_geohash_dqcjr0r       -1.885087e-04  0.000092
start_geohash_dqcjr0z       -4.145895e-04  0.000221
start_geohash_dqcjr15       -6.254817e-04  0.000057
start_geohash_dqcjr16       -4.557842e-04  0.000960
start_geohash_dqcjr17       -2.320338e-04  0.000372
start_geohash_dqcjr19       -5.734998e-04 -0.000591
start_geohash_dqcjr1c       -5.207606e-04  0.000248
start_geohash_dqcjr1n       -9.473773e-04 -0.000212
start_geohash_dqcjr1p       -3.420158e-04 -0.000083
start_geohash_dqcjr1s       -3.765516e-04  0.000144
start_geohash_dqcjr1t       -5.924234e-04  0.000687
start_geohash_dqcjr1z       -4.381796e-04  0.000348
start_geohash_dqcjr22       -3.024926e-04  0.001308
start_geohash_dqcjr29       -3.297446e-04  0.000400
start_geohash_dqcjr2e       -2.261471e-04  0.000124
start_geohash_dqcjr2w       -1.817512e-04 -0.000045
start_geohash_dqcjr30       -2.316954e-04  0.000290
start_geohash_dqcjr33       -6.823286e-04 -0.000515
start_geohash_dqcjr34       -6.553971e-04  0.000019
start_geohash_dqcjr3h       -2.575163e-04  0.000579
start_geohash_dqcjr3r       -2.436740e-04 -0.000100
start_geohash_dqcjr3s       -4.976425e-04  0.000174
start_geohash_dqcjr40       -3.409280e-04  0.000274
start_geohash_dqcjr45       -7.973528e-04  0.001341
start_geohash_dqcjr49       -2.073745e-04  0.000614
start_geohash_dqcjr4d       -1.233599e-03 -0.000198
start_geohash_dqcjr4j       -3.917400e-04 -0.000532
start_geohash_dqcjr4w       -1.616738e-03 -0.001410
start_geohash_dqcjr53       -2.510312e-03 -0.001107
start_geohash_dqcjr54       -1.752365e-03  0.000411
start_geohash_dqcjr5e       -7.818403e-04 -0.000173
start_geohash_dqcjr5g       -1.876519e-03 -0.002704
start_geohash_dqcjr5h       -9.293022e-04  0.001141
start_geohash_dqcjr5n       -8.736780e-04 -0.001225
start_geohash_dqcjr64       -1.081437e-03 -0.000314
start_geohash_dqcjr6b       -7.405456e-04  0.000623
start_geohash_dqcjr6d       -1.352707e-03 -0.001315
start_geohash_dqcjr6h       -2.013586e-03 -0.000178
start_geohash_dqcjr6z       -7.837822e-04  0.000658
start_geohash_dqcjr71       -1.272294e-03  0.000941
start_geohash_dqcjr7c       -6.840517e-04 -0.000380
start_geohash_dqcjr7v       -5.470992e-04  0.000304
start_geohash_dqcjr80       -1.075503e-03 -0.000333
start_geohash_dqcjr8x       -1.200393e-03 -0.000565
start_geohash_dqcjr91       -1.075878e-03 -0.000147
start_geohash_dqcjr95       -7.041566e-04 -0.001543
start_geohash_dqcjr97       -5.374598e-04 -0.000039
start_geohash_dqcjr99       -6.452289e-04  0.000454
start_geohash_dqcjr9n       -2.586702e-03 -0.003495
start_geohash_dqcjr9r       -9.755865e-04 -0.000683
start_geohash_dqcjr9u       -3.007900e-04 -0.000779
start_geohash_dqcjr9y       -1.813834e-03 -0.000353
start_geohash_dqcjrb5       -3.825163e-04 -0.001013
start_geohash_dqcjrbj       -2.171983e-04 -0.000082
start_geohash_dqcjrbt       -4.113281e-04 -0.000011
start_geohash_dqcjrbz       -5.105964e-04  0.000410
start_geohash_dqcjrc8       -1.209915e-03 -0.001179
start_geohash_dqcjrce       -1.263998e-03  0.000193
start_geohash_dqcjrcn       -7.430203e-04  0.000737
start_geohash_dqcjrct       -7.983318e-04 -0.000028
start_geohash_dqcjrcv       -3.369988e-05  0.000121
start_geohash_dqcjrcx       -2.568467e-04 -0.000321
start_geohash_dqcjrd0       -4.931551e-04  0.000218
start_geohash_dqcjrd1       -2.846111e-04  0.000686
start_geohash_dqcjrdw       -1.505854e-04 -0.001060
start_geohash_dqcjre1        2.669464e-04 -0.000058
start_geohash_dqcjreb       -8.306416e-04  0.000359
start_geohash_dqcjreg       -5.815801e-04 -0.000007
start_geohash_dqcjrew       -4.587242e-04  0.001034
start_geohash_dqcjrf5       -4.795231e-04  0.000055
start_geohash_dqcjrf8       -6.579631e-04 -0.000321
start_geohash_dqcjrfb       -4.202024e-04  0.000270
start_geohash_dqcjrfc       -1.357766e-03 -0.000884
start_geohash_dqcjrff       -1.059754e-03  0.000027
start_geohash_dqcjrfq       -1.015515e-04 -0.000149
start_geohash_dqcjrfu        2.024783e-05 -0.000927
start_geohash_dqcjrg8       -4.055040e-04  0.000273
start_geohash_dqcjrhc       -6.004489e-04 -0.000180
start_geohash_dqcjrhg       -1.561892e-03 -0.001110
start_geohash_dqcjrhn       -7.051932e-04  0.000579
start_geohash_dqcjrhs       -8.603771e-04  0.000028
start_geohash_dqcjrht       -5.267803e-04 -0.000363
start_geohash_dqcjrhx       -6.385425e-04 -0.000518
start_geohash_dqcjrhy       -3.858877e-04 -0.000094
start_geohash_dqcjrj7       -1.743836e-03 -0.000565
start_geohash_dqcjrjb       -5.209804e-04  0.000328
start_geohash_dqcjrjp       -4.031999e-04 -0.000416
start_geohash_dqcjrjw       -4.157110e-04  0.000303
start_geohash_dqcjrk4       -1.271378e-03  0.000034
start_geohash_dqcjrk6       -7.670284e-04 -0.000367
start_geohash_dqcjrkc       -8.114822e-04 -0.000334
start_geohash_dqcjrkw       -3.261063e-04  0.000566
start_geohash_dqcjrmm       -3.062316e-05  0.000431
start_geohash_dqcjrn4       -4.360254e-04 -0.000855
start_geohash_dqcjrn6       -4.177229e-04 -0.000205
start_geohash_dqcjrnd       -9.789955e-04  0.000801
start_geohash_dqcjrnn       -9.491471e-04 -0.001862
start_geohash_dqcjrny       -6.807397e-04 -0.000432
start_geohash_dqcjrp5       -1.759941e-04 -0.000447
start_geohash_dqcjrp6       -3.505790e-04 -0.000114
start_geohash_dqcjrpf       -7.083400e-04  0.000777
start_geohash_dqcjrpj       -5.846667e-04 -0.000116
start_geohash_dqcjrq4       -6.978080e-05 -0.000009
start_geohash_dqcjrq9       -6.190931e-04  0.000239
start_geohash_dqcjrr3       -1.194816e-04 -0.000447
start_geohash_dqcjrs3       -1.182366e-03 -0.001200
start_geohash_dqcjrsh       -2.039854e-04 -0.000592
start_geohash_dqcjrsp       -2.419018e-04  0.000224
start_geohash_dqcjrsy       -1.886550e-04 -0.000081
start_geohash_dqcjruc       -2.821231e-04 -0.000947
start_geohash_dqcjrvb        2.153306e-04  0.000264
start_geohash_dqcjrvm       -7.010644e-05  0.000379
start_geohash_dqcjrvt       -1.233555e-04  0.000594
start_geohash_dqcjrvx       -4.543749e-05 -0.000450
start_geohash_dqcjrwc       -6.377608e-05 -0.000437
start_geohash_dqcjrys       -1.218570e-04 -0.000443
start_geohash_dqcjrzm       -4.232365e-04 -0.000241
start_geohash_dqcjrzr       -1.142773e-04  0.000148
start_geohash_dqcjrzv        2.151125e-05  0.000080
start_geohash_dqcjt2h        3.792841e-05  0.000011
start_geohash_dqcjtb4       -1.773531e-04 -0.001361
start_geohash_dqcjtbd       -1.211754e-04  0.000615
start_geohash_dqcjtfx       -4.906236e-05 -0.000022
start_geohash_dqcjtmr        8.566523e-07 -0.000139
start_geohash_dqcjtu9        6.841160e-05 -0.000093
start_geohash_dqcjtug        7.477923e-05 -0.000005
start_geohash_dqcjtuh       -2.043965e-06  0.000200
start_geohash_dqcjtxt        1.325626e-04  0.000137
start_geohash_dqcjv7d        3.352485e-04  0.000105
start_geohash_dqcjv87        1.343533e-03  0.000105
start_geohash_dqcjv8n        2.227658e-05 -0.000096
start_geohash_dqcjv8r        1.300167e-04  0.000175
start_geohash_dqcjv8v        9.186056e-04  0.000564
start_geohash_dqcjv9f        2.946499e-04 -0.000137
start_geohash_dqcjv9s        2.088087e-04 -0.000082
start_geohash_dqcjvd1        1.532275e-04 -0.000246
start_geohash_dqcjvd3        9.479638e-04  0.000251
start_geohash_dqcjvee        1.314018e-04 -0.000129
start_geohash_dqcjw0e       -7.191469e-05  0.000444
start_geohash_dqcjw14        1.007421e-05 -0.000012
start_geohash_dqcjw16       -1.985892e-05  0.000487
start_geohash_dqcjw1b       -3.583910e-06 -0.000046
start_geohash_dqcjw2p       -2.285386e-04 -0.000288
start_geohash_dqcjw3e       -1.654710e-05 -0.000502
start_geohash_dqcjw3j       -5.428541e-05 -0.000037
start_geohash_dqcjw40       -7.838657e-05  0.000058
start_geohash_dqcjw5x        1.495808e-04  0.000873
start_geohash_dqcjw64       -3.454026e-05 -0.000282
start_geohash_dqcjwcn        1.050396e-04 -0.000262
start_geohash_dqcjwj7        2.991928e-05  0.000411
start_geohash_dqcjwm2        2.836645e-05  0.000243
start_geohash_dqcjx06       -3.377996e-05  0.000602
start_geohash_dqcjx0n        1.899799e-04  0.000355
start_geohash_dqcjx16        7.306319e-05 -0.000050
start_geohash_dqcjx20       -3.897340e-04 -0.000649
start_geohash_dqcjx2k       -1.295446e-04  0.000371
start_geohash_dqcjx30       -5.359563e-05 -0.000165
start_geohash_dqcjx3h       -4.563126e-05 -0.000238
start_geohash_dqcjx44        8.504543e-05  0.000336
start_geohash_dqcjx4w        5.498082e-05 -0.000077
start_geohash_dqcjx5f        1.032321e-04 -0.000242
start_geohash_dqcjx5r        3.475885e-05  0.000273
start_geohash_dqcjx5v        3.104740e-05  0.000191
start_geohash_dqcjx66        1.492303e-04  0.000121
start_geohash_dqcjx7g        6.980985e-05  0.000079
start_geohash_dqcjxbr       -2.309845e-04 -0.000129
start_geohash_dqcjxdb       -4.027363e-05 -0.000211
start_geohash_dqcjxg0        9.320545e-05 -0.001337
start_geohash_dqcjxgb       -3.522698e-05  0.000020
start_geohash_dqcjxhw        3.113568e-04 -0.000041
start_geohash_dqcjxjy        9.582251e-05 -0.000173
start_geohash_dqcjxn1        1.652488e-04  0.000117
start_geohash_dqcjxpq        3.272189e-04  0.000324
start_geohash_dqcjxqn        1.884178e-04  0.000174
start_geohash_dqcjxrn        1.305353e-03  0.000215
start_geohash_dqcjxs0        1.417014e-04 -0.000097
start_geohash_dqcjxut        4.011064e-05 -0.000246
start_geohash_dqcjxx4        2.133402e-04  0.000053
start_geohash_dqcjxxt        1.351975e-04 -0.000055
start_geohash_dqcjxzj        1.938445e-04  0.000126
start_geohash_dqcjyfy        1.523679e-04  0.000071
start_geohash_dqcjz1h        1.869789e-05  0.000005
start_geohash_dqcjz31        1.632493e-04  0.000023
start_geohash_dqcjz4s        1.388619e-04 -0.000096
start_geohash_dqcjz51        1.936096e-04  0.000130
start_geohash_dqcjz5p        2.862590e-04  0.000296
start_geohash_dqcjz6n        2.208044e-04 -0.000073
start_geohash_dqcjz8v        3.436546e-04 -0.000385
start_geohash_dqcjzc6        6.772365e-04  0.000523
start_geohash_dqckbp7        1.540982e-04  0.000195
start_geohash_dqckbqs        1.003093e-04  0.000162
start_geohash_dqckbrk        4.121073e-05  0.000024
start_geohash_dqcm01m        7.103337e-05  0.000031
start_geohash_dqcm04q        3.762562e-05  0.000268
start_geohash_dqcm05d       -2.038756e-05  0.000161
start_geohash_dqcm05m       -1.540153e-05 -0.000154
start_geohash_dqcm0d8        6.458140e-05  0.000124
start_geohash_dqcm0ez        1.134896e-04 -0.000196
start_geohash_dqcm0hp        2.164264e-05 -0.000128
start_geohash_dqcm0hw        7.757184e-05  0.000203
start_geohash_dqcm0ju       -1.576846e-04  0.000494
start_geohash_dqcm0kx        3.228386e-05 -0.000053
start_geohash_dqcm0mx        4.582464e-04  0.000738
start_geohash_dqcm0n3       -2.071447e-04  0.000140
start_geohash_dqcm0n7       -1.330126e-05 -0.000236
start_geohash_dqcm0nb        3.441668e-04 -0.000055
start_geohash_dqcm0ns        3.931534e-04 -0.000137
start_geohash_dqcm0pf       -4.395849e-04 -0.000366
start_geohash_dqcm0pn       -3.077699e-04  0.000250
start_geohash_dqcm0pw       -3.510094e-04 -0.000294
start_geohash_dqcm0q2       -4.647856e-05  0.000258
start_geohash_dqcm0qf       -1.561026e-04  0.000141
start_geohash_dqcm0re       -2.484681e-04  0.000127
start_geohash_dqcm0xy        2.745498e-05 -0.000041
start_geohash_dqcm1r8        6.981231e-05  0.000315
start_geohash_dqcm1w8        1.558219e-04 -0.000160
start_geohash_dqcm20e       -1.938104e-04 -0.000003
start_geohash_dqcm20q       -1.305344e-04 -0.000087
start_geohash_dqcm21d       -1.366017e-04  0.000548
start_geohash_dqcm21n       -4.458867e-04 -0.000340
start_geohash_dqcm21w       -2.354232e-04 -0.000293
start_geohash_dqcm22s       -8.065162e-05 -0.000202
start_geohash_dqcm231       -1.608145e-04 -0.000189
start_geohash_dqcm23c       -3.080371e-04 -0.001027
start_geohash_dqcm245       -4.134351e-05 -0.000107
start_geohash_dqcm24g       -1.517137e-04 -0.000008
start_geohash_dqcm24w       -1.080201e-04  0.000027
start_geohash_dqcm25s        1.720326e-05 -0.000155
start_geohash_dqcm27y        2.365645e-04  0.000723
start_geohash_dqcm28c        2.343347e-05  0.000067
start_geohash_dqcm290       -3.736766e-05  0.000101
start_geohash_dqcm2hq       -3.175573e-05  0.000518
start_geohash_dqcm2ju       -2.197734e-05  0.000047
start_geohash_dqcm2n8       -3.272044e-05 -0.000796
start_geohash_dqcm2p8       -6.428822e-05  0.000318
start_geohash_dqcm2r9       -7.727885e-05 -0.000066
start_geohash_dqcm2rn        1.184983e-04 -0.000160
start_geohash_dqcm2w5        4.276470e-06 -0.000042
start_geohash_dqcm318       -7.864146e-06  0.000046
start_geohash_dqcm375        1.231010e-04  0.000121
start_geohash_dqcm5z9        4.403056e-04  0.000312
start_geohash_dqcm737        4.321733e-04  0.000024
start_geohash_dqcm813        8.578056e-06 -0.000201
start_geohash_dqcm830        3.642437e-05 -0.000224
start_geohash_dqcm839        4.561588e-05 -0.000020
start_geohash_dqcm8be        8.694923e-05 -0.000029
start_geohash_dqcm8e9        4.448160e-04  0.000411
start_geohash_dqcm8er        2.569978e-04 -0.000407
start_geohash_dqcm8q8        1.780363e-04 -0.000019
start_geohash_dqcm8v7        2.765322e-04 -0.000208
start_geohash_dqcm8y5        1.658995e-04  0.000069
start_geohash_dqcm95t        1.528562e-04 -0.000232
start_geohash_dqcm9h3        1.495233e-04  0.000165
start_geohash_dqcm9kc        5.071821e-04  0.000457
start_geohash_dqcm9np        2.072013e-04 -0.000047
start_geohash_dqcmfeg        4.021840e-04  0.000168
start_geohash_dqcnhuw        1.065863e-03 -0.000245
start_geohash_dqcnj5x        3.568539e-04 -0.000212
start_geohash_dqcnk9e        4.937358e-04 -0.000018
start_geohash_dqcnke2        4.788696e-04 -0.000036
start_geohash_dqcnkjn        1.496524e-03 -0.000410
start_geohash_dqcnndr        2.004154e-04 -0.000139
start_geohash_dqcnndt        1.042013e-03 -0.000305
start_geohash_dqcnnsm        8.840180e-04 -0.000335
start_geohash_dqcns19        5.643528e-04  0.000132
start_geohash_dqcns48        6.884501e-04  0.000061
start_geohash_dqcq35j        4.640535e-04 -0.000229
start_capacity_bin_15–30 $  -5.198665e-02  0.010980
start_capacity_bin_30–60 $  -2.088911e-02 -0.007563
start_capacity_bin_7–10 $    4.112017e-04 -0.000407

المهمة السرية:

In [ ]:
def haversine_distance(lat1, lon1, lat2, lon2):
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a))
    r = 6371  
    return c * r

df['distance_km'] = df.apply(
    lambda row: haversine_distance(row['start_lat_y'], row['start_lng_y'], row['end_lat_y'], row['end_lng_y']),
    axis=1
)

if 'ride_duration_min' not in df.columns:
    df['started_at'] = pd.to_datetime(df['started_at'])
    df['ended_at'] = pd.to_datetime(df['ended_at'])
    df['ride_duration_min'] = (df['ended_at'] - df['started_at']).dt.total_seconds() / 60

df['min_per_km'] = df['ride_duration_min'] / df['distance_km']
In [200]:
df = df.replace([np.inf, -np.inf], np.nan)
df['min_per_km'].value_counts()
Out[200]:
min_per_km
4.345489     432
4.116779     426
4.574199     402
4.231134     376
4.802909     375
            ... 
16.338615      1
6.870589       1
91.682762      1
14.062801      1
6.114291       1
Name: count, Length: 2108775, dtype: int64
In [201]:
df_clean = df[(df['min_per_km'] >= 1) & (df['min_per_km'] <= 20)]
classic_bike_business = df_clean[
    (df_clean['rideable_type'] == 'classic_bike') &
    (df_clean['business_area_fee'] > 0)
]


electric_bike_business = df_clean[
    (df_clean['rideable_type'] == 'electric_bike') &
    (df_clean['business_area_fee'] > 0)
]


print("Classic Bike in business area:\n", classic_bike_business['min_per_km'].describe())
print("\nElectric Bike in business area:\n", electric_bike_business['min_per_km'].describe())
Classic Bike in business area:
 count    872661.000000
mean          7.638109
std           3.134692
min           1.988944
25%           5.499772
50%           6.743477
75%           8.764316
max          19.999630
Name: min_per_km, dtype: float64

Electric Bike in business area:
 count    453255.000000
mean          5.594365
std           2.575858
min           2.011350
25%           4.000943
50%           4.882934
75%           6.243582
max          19.999209
Name: min_per_km, dtype: float64
In [202]:
classic_bike_business = df_clean[
    (df_clean['rideable_type'] == 'classic_bike') &
    (df_clean['business_area_fee'] == 0)
]


electric_bike_business = df_clean[
    (df_clean['rideable_type'] == 'electric_bike') &
    (df_clean['business_area_fee'] == 0)
]


print("Classic Bike not in business area:\n", classic_bike_business['min_per_km'].describe())
print("\nElectric Bike not in business area:\n", electric_bike_business['min_per_km'].describe())
Classic Bike not in business area:
 count    1.532502e+06
mean     6.951044e+00
std      2.695855e+00
min      1.551741e+00
25%      5.193204e+00
50%      6.260399e+00
75%      7.840503e+00
max      1.999947e+01
Name: min_per_km, dtype: float64

Electric Bike not in business area:
 count    1.141262e+06
mean     5.252941e+00
std      2.230270e+00
min      1.140689e+00
25%      3.883273e+00
50%      4.702220e+00
75%      5.882012e+00
max      1.999955e+01
Name: min_per_km, dtype: float64
In [ ]:
df_trips = df.merge(df_station[['name', 'inside_dc']], left_on='start_station_name', right_on='name', how='left')
df_trips.rename(columns={'inside_dc': 'start_inside_dc'}, inplace=True)
df_trips.drop('name', axis=1, inplace=True)

df_trips = df_trips.merge(df_station[['name', 'inside_dc']], left_on='end_station_name', right_on='name', how='left')
df_trips.rename(columns={'inside_dc': 'end_inside_dc'}, inplace=True)
df_trips.drop('name', axis=1, inplace=True)

outside_dc_trips = df_trips[(df_trips['start_inside_dc'] == False) | (df_trips['end_inside_dc'] == False)]
count_outside_dc = len(outside_dc_trips)
print(f"عدد الرحلات التي بدايتها أو نهايتها خارج DC: {count_outside_dc}")
عدد الرحلات التي بدايتها أو نهايتها خارج DC: 738594
In [204]:
cross_boundary_trips = df_trips[
    (
        ((df_trips['start_inside_dc'] == False) & (df_trips['end_inside_dc'] == True)) |
        ((df_trips['start_inside_dc'] == True) & (df_trips['end_inside_dc'] == False))
    )
    &
    (df_trips['trip_in_business_area'] == False)
]

print(f"عدد الرحلات التي عبرت الحدود بين داخل وخارج DC ولم تنتهِ في منطقة تجارية: {len(cross_boundary_trips):,}")
عدد الرحلات التي عبرت الحدود بين داخل وخارج DC ولم تنتهِ في منطقة تجارية: 136,945
In [205]:
bike_type_counts = cross_boundary_trips['rideable_type'].value_counts()

print("توزيع نوع الدراجة للرحلات خارج DC:")
print(bike_type_counts)
توزيع نوع الدراجة للرحلات خارج DC:
rideable_type
classic_bike     77991
electric_bike    58954
Name: count, dtype: Int64
In [ ]:
long_valid_rides = df_trips[(df_trips['distance_km'] >10)]

ride_type_counts = long_valid_rides['rideable_type'].value_counts()
ride_type_percent = ride_type_counts / ride_type_counts.sum() * 100


print("\ ازا مسافة 10 كيلومترnالنسبة المئوية لكل نوع:")
print(ride_type_percent)
\ ازا مسافة 10 كيلومترnالنسبة المئوية لكل نوع:
rideable_type
electric_bike    51.440212
classic_bike     48.559788
Name: count, dtype: Float64
In [ ]:
long_valid_rides = df_trips[(df_trips['distance_km'] <1.5)]
ride_type_counts = long_valid_rides['rideable_type'].value_counts()
ride_type_percent = ride_type_counts / ride_type_counts.sum() * 100


print("\ ازا مسافة 2 كيلومترnالنسبة المئوية لكل نوع:")
print(ride_type_percent)
\ ازا مسافة 2 كيلومترnالنسبة المئوية لكل نوع:
rideable_type
classic_bike     68.607614
electric_bike    31.392386
Name: count, dtype: Float64
In [ ]:
closest_stations = []

for idx, row in df_station.iterrows():
    station_id = row['id']
    station_name = row['name']
    coord1 = (row['lat'], row['lng'])

    df_others = df_station[df_station['id'] != station_id].copy()
    df_others['distance_km'] = df_others.apply(
        lambda x: geodesic(coord1, (x['lat'], x['lng'])).km, axis=1)


    nearest = df_others.loc[df_others['distance_km'].idxmin()]
    closest_stations.append({
        'station_id': station_id,
        'station_name': station_name,
        'closest_station_id': nearest['id'],
        'closest_station_name': nearest['name'],
        'distance_km': nearest['distance_km']
    })

df_closest = pd.DataFrame(closest_stations)
In [209]:
print(df_closest['distance_km'].describe())
count    819.000000
mean       0.396832
std        0.291111
min        0.001051
25%        0.213340
50%        0.316546
75%        0.490054
max        3.399025
Name: distance_km, dtype: float64
In [210]:
df_closest_under_600m = df_closest[df_closest['distance_km'] < 0.6]
In [ ]:
df_links = df_trips[['start_station_name', 'end_station_name', 'ride_duration_min']]
df_merge = pd.merge(df_closest_under_600m, df_links,
                    left_on=['station_name', 'closest_station_name'],
                    right_on=['start_station_name', 'end_station_name'],
                    how='left')
In [ ]:
df_with_trips = df_merge.dropna(subset=['ride_duration_min'])
duration_summary = df_with_trips.groupby(
    ['station_name', 'closest_station_name']
)['ride_duration_min'].agg(['count', 'mean', 'median', 'std']).reset_index()

print(duration_summary.head())
          station_name       closest_station_name  count       mean  median  \
0       10th & e st nw             11th & f st nw     87   7.926437     2.2   
1       10th & g st nw             11th & f st nw     29  35.986207    14.8   
2       10th & h st ne              8th & h st ne    136   7.883824     1.4   
3       10th & k st nw  12th st & new york ave nw     71  16.287324     3.8   
4  10th & monroe st ne         7th & monroe st ne    144  11.078472     2.0   

         std  
0  15.379687  
1  48.075757  
2  20.452149  
3  27.329706  
4  24.098403  
In [213]:
len(df_with_trips)
#69740
len(df)
#6103742
Out[213]:
6103742
In [214]:
us_holidays = [
    datetime.date(2024, 1, 1),   # New Year's Day
    datetime.date(2024, 1, 15),  # Martin Luther King Jr. Day
    datetime.date(2024, 2, 19),  # Presidents' Day
    datetime.date(2024, 5, 27),  # Memorial Day
    datetime.date(2024, 6, 19),  # Juneteenth
    datetime.date(2024, 7, 4),   # Independence Day
    datetime.date(2024, 9, 2),   # Labor Day
    datetime.date(2024, 10, 14), # Columbus Day
    datetime.date(2024, 11, 11), # Veterans Day
    datetime.date(2024, 11, 28), # Thanksgiving Day
    datetime.date(2024, 12, 25), # Christmas Day
]
In [215]:
df['is_holiday'] = df['start_date'].isin(us_holidays)
holiday_usage = df['is_holiday'].value_counts()
print("عدد الرحلات في أيام العطل مقابل الأيام العادية")
print(holiday_usage)
عدد الرحلات في أيام العطل مقابل الأيام العادية
is_holiday
False    5959025
True      144717
Name: count, dtype: int64
In [216]:
avg_duration_by_holiday = df.groupby('is_holiday')['ride_duration_min'].mean()
print("\nمتوسط مدة الرحلة في أيام العطل مقابل الأيام العادية")
print(avg_duration_by_holiday)
متوسط مدة الرحلة في أيام العطل مقابل الأيام العادية
is_holiday
False    16.336733
True     19.578255
Name: ride_duration_min, dtype: float64
In [217]:
holiday_users = df[df['is_holiday'] == True]['member_casual'].value_counts(normalize=True)
print("\nنسبة العضو والزائر في أيام العطل")
print(holiday_users)
نسبة العضو والزائر في أيام العطل
member_casual
member    0.609963
casual    0.390037
Name: proportion, dtype: Float64
In [219]:
locations_df['STATION_ID'] = locations_df['STATION_ID'].astype(str).str.strip()
locations_df['is_active'] = locations_df['STATION_STATUS'].str.lower().eq('active')
inactive_station_ids = locations_df[~locations_df['is_active']]['STATION_ID'].unique()

print("عدد المحطات غير الفعالة:", len(inactive_station_ids))
print(inactive_station_ids)
عدد المحطات غير الفعالة: 4
['08249e81-1f3f-11e7-bf6b-3863bb334450'
 '0824eb1b-1f3f-11e7-bf6b-3863bb334450'
 '0826547e-1f3f-11e7-bf6b-3863bb334450'
 'b8b6903d-fabc-48aa-af9b-2c5fb29344b9']
In [220]:
df['start_station_name_clean'] = df['start_station_name'].astype(str).str.strip().str.lower()
locations_df['NAME_clean'] = locations_df['NAME'].astype(str).str.strip().str.lower()

usage_vs_status = (
    df.merge(
        locations_df[['NAME_clean', 'is_active']],
        left_on='start_station_name_clean',
        right_on='NAME_clean',
        how='left'
    )
    .groupby('is_active')['ride_id']
    .count()
)

print("\n rides based on station status: ")
print(usage_vs_status)
 rides based on station status: 
is_active
False      30684
True     4771034
Name: ride_id, dtype: int64
In [222]:
df['start_station_name_clean'] = df['start_station_name'].astype(str).str.strip().str.lower()
locations_df['NAME_clean'] = locations_df['NAME'].astype(str).str.strip().str.lower()
inactive_names = locations_df[~locations_df['is_active']]['NAME_clean'].unique()
inactive_trips = df[df['start_station_name_clean'].isin(inactive_names)]
inactive_trips['started_at'] = pd.to_datetime(inactive_trips['started_at'])
monthly_counts = inactive_trips.resample('M', on='started_at')['ride_id'].count()

print("number of rides that are from in active sations ")
print(monthly_counts)
number of rides that are from in active sations 
started_at
2024-01-31    1148
2024-02-29    1523
2024-03-31    1960
2024-04-30    2420
2024-05-31    2392
2024-06-30    2951
2024-07-31    2987
2024-08-31    2923
2024-09-30    3314
2024-10-31    4536
2024-11-30    3143
2024-12-31    1387
Freq: M, Name: ride_id, dtype: int64
C:\Users\asus\AppData\Local\Temp\ipykernel_12476\988411189.py:5: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [ ]: